-
Notifications
You must be signed in to change notification settings - Fork 2
/
cloud_out_mms.py
executable file
·91 lines (73 loc) · 4.66 KB
/
cloud_out_mms.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# -*-- encoding=utf-8 --*-
import pandas as pd
import xlsxwriter
import os
import platform
from pandas import ExcelWriter
from util import main_function,plot_trend
def clean_data(df_csv_datas):
'''
数据清洗
'''
row_length_before = df_csv_datas.shape[0]
print('\t清理--清除一些用不到的字段')
df_csv_datas = df_csv_datas.drop(['emmcid','上报时间','异常进程名','进程版本名','进程版本号','异常进程包名','软件系统类型',
'国家','省/直辖市','市','县/区','详细地址','异常类型','结束位置码','结束基站编号','结束电话网络','结束数据网络',
'isim支持情况','MBN版本信息','VOLTE配置信息','是否volte','呼叫对方号码','保留字段一','保留字段二','异常次数','日志路径','log信息'],axis=1)
df_csv_datas = df_csv_datas.fillna('-1')
print('\t运营商--移除测试的PLMN')
fp = open(os.path.join(os.path.abspath('.'),'config','remove_test_plmn.txt'),'r')
test_plmn_list = [plmn.strip() for plmn in fp.readlines()]
df_csv_datas = df_csv_datas[-df_csv_datas['运营商'].isin(test_plmn_list)]
fp.close()
print('\t起呼电话网络--移除起呼电话网络为UNKNOWN')
df_csv_datas=df_csv_datas.loc[df_csv_datas['起呼电话网络'] != 'UNKNOWN']
print('\tIMEI--移除测试的IMEI')
fp = open(os.path.join(os.path.abspath('.'),'config','remove_test_imei.txt'),'r')
test_imei_list = [imei.strip() for imei in fp.readlines()]
df_csv_datas = df_csv_datas[-df_csv_datas['imei'].isin(test_imei_list)]
fp.close()
print('\t合并一些共同分析的字段')
df_csv_datas['开始基站位置'] = df_csv_datas['运营商'].str.cat(df_csv_datas['起呼位置码'],sep='/').str.cat(df_csv_datas['起呼基站编号'],sep='/')
df_csv_datas = df_csv_datas.drop(['起呼位置码','起呼基站编号'],axis=1)
df_csv_datas['运营商_数据网络'] = df_csv_datas['运营商'].str.cat(df_csv_datas['开始数据网络'], sep='/')
df_csv_datas['机型'] = df_csv_datas['外部机型']
df_csv_datas = df_csv_datas.drop(['外部机型','内部机型'],axis=1)
print('\t发生时间--提取发生的小时')
df_csv_datas['发生时间1'] = pd.to_datetime(df_csv_datas['发生时间'],infer_datetime_format=True)
df_csv_datas['发生时间h'] = df_csv_datas['发生时间1'].apply(get_hour)
df_csv_datas = df_csv_datas.drop(['发生时间','发生时间1'],axis=1)
print('\t出现异常的卡--替换sim卡的定义')
df_csv_datas['出现异常的卡'] = df_csv_datas['出现异常的卡'].apply(replace_sim)
row_length_after = df_csv_datas.shape[0]
print('\t数据清洗前后的数量='+str(row_length_after)+'/'+str(row_length_before)+',数据清洗比率='+str(row_length_after*100/row_length_before)+'%')
return df_csv_datas
def replace_sim(sim):
if(sim == '0'):
return '卡1'
elif(sim == '1'):
return '卡2'
else:
return '-1'
def get_hour(name):
returnName=name.to_pydatetime().hour
return returnName
def cloud_out_mms_main(path_raw_data,path_result):
main_function('云诊断外销MMS', path_raw_data, path_result, clean_data)
def cloud_out_mms_plot_trend(path_raw_data,path_result):
sheet_name_list=['SIM卡', '失败原因', '呼入或呼出', '运营商', '电话网络', '发生时间h', '机型', '系统版本', 'PLMN_CS']
trend_dics_list={}
trend_dics_list['出现异常的卡']=['卡1','卡2']
trend_dics_list['失败原因']=['3','Acquiring network timed out','-----StatusCode====0','Not Found-----StatusCode====404','Gateway Timeout-----StatusCode====504']
trend_dics_list['起呼电话网络'] = ['LTE','HSPA','UMTS','GSM','EDGE','GPRS','HSPA+','HSDPA']
trend_dics_list['开始数据网络'] = ['LTE','HSPA','UMTS','GSM','EDGE','GPRS','HSPA+','HSDPA']
trend_dics_list['机型']=['PD1708F','PD1708F_EX','PD1705F_EX','PD1612DF_EX','PD1612F_EX','PD1612BF_EX','PD1624F_EX']
trend_dics_list['IMEI频次'] = ['PD1708F','PD1708F_EX','PD1705F_EX','PD1612DF_EX','PD1612F_EX','PD1612BF_EX','PD1624F_EX']
trend_dics_list['国家1'] = ['Philippines','India','Indonesia','Myanmar (Burma)','Thailand','Malaysia','Pakistan','Viet Nam']
# plot_trend('云诊断外销MMS', path_raw_data, path_result, trend_dics_list)
if __name__ == '__main__':
path1=os.path.abspath('/opt/vivo-home/bigdata_cloud_datas/cloud_in_mms_rawdata/weeks')
path2=os.path.abspath('/opt/vivo-home/bigdata_cloud_datas/cloud_in_mms_reportdata/weeks')
cloud_in_sms_main(path1,path2)
path3=os.path.join('/opt/vivo-home/bigdata_cloud_datas/cloud_in_mms_reportdata', '云诊断内销MMS周趋势.xlsx')
cloud_in_sms_plot_trend(path2,path3)