In [61]:
import numpy as np
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
#from sklearn.model_selection import train_test_split
#from sklearn import metrics
from sklearn.model_selection import cross_val_score, cross_val_predict
from sklearn_extra.cluster import KMedoids
from scipy.spatial.distance import euclidean

In [62]:
def complexity(ts):
    return np.sqrt(np.sum(np.diff(ts)**2))
# CID Distance function
def cid_distance(ts1, ts2):
    # Ensure both inputs are numpy arrays
    ts1 = np.asarray(ts1)
    ts2 = np.asarray(ts2)

    # Step 1: Compute standard Euclidean distance
    dist = euclidean(ts1, ts2)
    
    # Step 2: Compute the complexities of both time series
    c_ts1 = complexity(ts1)
    c_ts2 = complexity(ts2)
    
    # Step 3: Apply the CID formula
    return dist * np.sqrt(c_ts1 / c_ts2)

In [63]:
#12x12 grid, monday(4/11) - smsin / friday(8/11)
data = pd.read_csv("../csv/12x12/classif_12x12_smsin.csv")
#data = pd.read_csv("../csv/20x20/classif20x20frid_smsin.csv")

data.head()

Unnamed: 0.1,Unnamed: 0,cellid,smsin0,smsin1,smsin2,smsin3,smsin4,smsin5,smsin6,smsin7,...,smsin15,smsin16,smsin17,smsin18,smsin19,smsin20,smsin21,smsin22,smsin23,WHF
0,0,4379,0.000378,0.000199,0.0,9e-06,0.000158,0.000204,0.000482,0.001992,...,0.004781,0.005944,0.00533,0.00547,0.0039,0.005816,0.003817,0.004231,0.000898,1
1,1,4380,0.001471,0.000311,2.8e-05,2e-06,0.000158,0.000302,0.000998,0.003448,...,0.008373,0.009585,0.010007,0.008427,0.007845,0.009887,0.006584,0.007445,0.00244,0
2,2,4381,0.001256,0.000577,5.2e-05,3e-05,8.6e-05,0.000308,0.000879,0.003441,...,0.006474,0.00629,0.006895,0.005045,0.004978,0.005282,0.003231,0.002469,0.001343,1
3,3,4382,0.002337,0.001663,0.000779,0.000445,0.00022,0.000678,0.002451,0.011564,...,0.023814,0.023962,0.02265,0.019704,0.017173,0.02117,0.016779,0.012174,0.008751,1
4,4,4383,0.002504,0.001642,0.000703,0.00039,0.000237,0.00071,0.002671,0.01231,...,0.022289,0.022932,0.023358,0.020317,0.018138,0.020992,0.017747,0.012685,0.009127,1


In [64]:
y = data.iloc[:,26] #h/w
x = data.iloc[:,2:26] #sms, calls, internet

In [65]:
y

0      1
1      0
2      1
3      1
4      1
      ..
139    1
140    1
141    1
142    2
143    1
Name: WHF, Length: 144, dtype: int64

In [66]:
x

Unnamed: 0,smsin0,smsin1,smsin2,smsin3,smsin4,smsin5,smsin6,smsin7,smsin8,smsin9,...,smsin14,smsin15,smsin16,smsin17,smsin18,smsin19,smsin20,smsin21,smsin22,smsin23
0,0.000378,0.000199,0.000000,0.000009,0.000158,0.000204,0.000482,0.001992,0.004211,0.005459,...,0.004141,0.004781,0.005944,0.005330,0.005470,0.003900,0.005816,0.003817,0.004231,0.000898
1,0.001471,0.000311,0.000028,0.000002,0.000158,0.000302,0.000998,0.003448,0.007770,0.008911,...,0.009113,0.008373,0.009585,0.010007,0.008427,0.007845,0.009887,0.006584,0.007445,0.002440
2,0.001256,0.000577,0.000052,0.000030,0.000086,0.000308,0.000879,0.003441,0.005561,0.008328,...,0.007181,0.006474,0.006290,0.006895,0.005045,0.004978,0.005282,0.003231,0.002469,0.001343
3,0.002337,0.001663,0.000779,0.000445,0.000220,0.000678,0.002451,0.011564,0.017392,0.020199,...,0.018495,0.023814,0.023962,0.022650,0.019704,0.017173,0.021170,0.016779,0.012174,0.008751
4,0.002504,0.001642,0.000703,0.000390,0.000237,0.000710,0.002671,0.012310,0.018621,0.021637,...,0.017875,0.022289,0.022932,0.023358,0.020317,0.018138,0.020992,0.017747,0.012685,0.009127
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
139,0.000260,0.000134,0.000027,0.000042,0.000044,0.000064,0.000351,0.000940,0.002080,0.001895,...,0.002063,0.002171,0.002536,0.002225,0.002267,0.002355,0.002720,0.001677,0.001938,0.000917
140,0.001891,0.000466,0.000092,0.000371,0.000093,0.000377,0.001869,0.006120,0.011430,0.011743,...,0.010157,0.009427,0.014730,0.013157,0.013804,0.014975,0.013494,0.008704,0.008136,0.007718
141,0.001507,0.000049,0.000222,0.000009,0.000106,0.000193,0.001084,0.003321,0.004874,0.006255,...,0.005345,0.005217,0.006329,0.006694,0.006489,0.007942,0.004619,0.003900,0.002600,0.001745
142,0.002567,0.001161,0.000048,0.000346,0.000107,0.000184,0.001654,0.010575,0.020817,0.025802,...,0.025928,0.024838,0.031106,0.025331,0.021948,0.023962,0.019518,0.019559,0.017465,0.009439


In [67]:
knn = KNeighborsClassifier(n_neighbors=10)
scores1 = cross_val_score(knn, x, y, cv=5)
scores2 = cross_val_score(knn, x, y, cv=10)

In [68]:
#print(scores1.mean(dtype=np.float64))
#print(scores2)
print("5 fold: %0.2f accuracy with a standard deviation of %0.2f" % (scores1.mean(), scores1.std()))
print("10 fold: %f accuracy with a standard deviation of %f" % (scores2.mean(dtype=np.float64), scores2.std(dtype=np.float64)))
print(scores2)

5 fold: 0.40 accuracy with a standard deviation of 0.04
10 fold: 0.410476 accuracy with a standard deviation of 0.097376
[0.53333333 0.4        0.26666667 0.33333333 0.5        0.57142857
 0.42857143 0.28571429 0.35714286 0.42857143]


In [69]:
#y_predt = cross_val_predict(knn, x, y, cv=5) #10

In [70]:
data = pd.read_csv("../csv/12x12/classif_12x12_smsout.csv")
#data = pd.read_csv("../csv/20x20/classif20x20frid_smsout.csv")

data.head()
#20x20 grid, monday(4/11) - smsout / friday(8/11)

Unnamed: 0.1,Unnamed: 0,cellid,smsout0,smsout1,smsout2,smsout3,smsout4,smsout5,smsout6,smsout7,...,smsout15,smsout16,smsout17,smsout18,smsout19,smsout20,smsout21,smsout22,smsout23,WHF
0,0,4379,0.000506,0.000249,1e-05,2e-06,4.8e-05,5e-06,0.000482,0.001283,...,0.003694,0.002877,0.002676,0.003814,0.002715,0.003396,0.003801,0.002788,0.001381,1
1,1,4380,0.001622,0.001365,0.010339,0.011154,0.010858,0.011243,0.00889,0.002497,...,0.004583,0.005636,0.007947,0.013844,0.006955,0.005852,0.005189,0.005335,0.002228,0
2,2,4381,0.001115,0.00037,0.000138,0.0,8e-06,8e-06,0.000418,0.002093,...,0.003074,0.002176,0.002187,0.003226,0.002198,0.003144,0.001724,0.00197,0.001457,1
3,3,4382,0.00187,0.001042,0.001751,0.0,0.000118,0.000118,0.001525,0.011887,...,0.017044,0.013721,0.014986,0.016102,0.012975,0.016939,0.014571,0.012702,0.00993,1
4,4,4383,0.002081,0.001223,0.001724,1.9e-05,0.000104,0.000402,0.001724,0.013061,...,0.017347,0.015103,0.015889,0.017413,0.014364,0.01791,0.015497,0.013267,0.010571,1


In [71]:
y = data.iloc[:,26] #h/w
x = data.iloc[:,2:26] #sms, calls, internet

In [72]:
knn = KNeighborsClassifier(n_neighbors=10)
scores1 = cross_val_score(knn, x, y, cv=5)
scores2 = cross_val_score(knn, x, y, cv=10)

In [73]:
print("5 fold: %0.2f accuracy with a standard deviation of %0.2f" % (scores1.mean(), scores1.std()))
print("10 fold: %f accuracy with a standard deviation of %f" % (scores2.mean(), scores2.std()))
print(scores2)

5 fold: 0.42 accuracy with a standard deviation of 0.05
10 fold: 0.450476 accuracy with a standard deviation of 0.137457
[0.53333333 0.33333333 0.46666667 0.6        0.64285714 0.57142857
 0.5        0.28571429 0.21428571 0.35714286]


In [74]:
#y_predt = cross_val_predict(knn, x, y, cv=5) #10

In [75]:
data = pd.read_csv("../csv/12x12/classif_12x12_callin.csv")
#data = pd.read_csv("../csv/20x20/classif20x20frid_callin.csv")

data.head()
#20x20 grid, monday(4/11) - callin / friday(8/11)

Unnamed: 0.1,Unnamed: 0,cellid,callin0,callin1,callin2,callin3,callin4,callin5,callin6,callin7,...,callin15,callin16,callin17,callin18,callin19,callin20,callin21,callin22,callin23,WHF
0,0,4379,0.000204,0.000716,0.000118,0.0,0.0,0.000324,0.000149,0.002093,...,0.00915,0.013073,0.010728,0.009591,0.008018,0.004955,0.003183,0.001443,0.000482,1
1,1,4380,0.001311,0.000462,0.000277,0.0,0.000101,0.000388,0.000334,0.004638,...,0.014708,0.017623,0.016723,0.013522,0.013662,0.008128,0.004873,0.00261,0.001061,0
2,2,4381,0.000364,0.000249,0.0,0.0,0.0,0.000218,0.000499,0.004747,...,0.010988,0.011554,0.011801,0.008018,0.007783,0.00472,0.002418,0.000904,0.000619,1
3,3,4382,0.001501,0.000915,0.0,0.0,0.0,0.001501,0.002771,0.010943,...,0.031121,0.046033,0.046125,0.04856,0.024682,0.020695,0.009119,0.004223,0.004496,1
4,4,4383,0.001647,0.000863,1.4e-05,0.0,3.5e-05,0.00155,0.002651,0.011641,...,0.033412,0.046762,0.046555,0.051328,0.025946,0.021297,0.010219,0.004508,0.005045,1


In [76]:
y = data.iloc[:,26] #h/w
x = data.iloc[:,2:26] #sms, calls, internet

In [77]:
knn = KNeighborsClassifier(n_neighbors=10)
scores1 = cross_val_score(knn, x, y, cv=5)
scores2 = cross_val_score(knn, x, y, cv=10)

In [78]:
print("5 fold: %0.2f accuracy with a standard deviation of %0.2f" % (scores1.mean(), scores1.std()))
print("10 fold: %f accuracy with a standard deviation of %f" % (scores2.mean(dtype=np.float64), scores2.std(dtype=np.float64)))
print(scores2)

5 fold: 0.46 accuracy with a standard deviation of 0.05
10 fold: 0.471905 accuracy with a standard deviation of 0.105483
[0.46666667 0.6        0.4        0.46666667 0.5        0.71428571
 0.42857143 0.42857143 0.35714286 0.35714286]


In [79]:
#y_predt = cross_val_predict(knn, x, y, cv=5) #10

In [80]:
data2 = pd.read_csv("../csv/12x12/classif_12x12_callout.csv")
#data = pd.read_csv("../csv/20x20/classif20x20frid_callout.csv")

data2.head()
#20x20 grid, monday(4/11) - callout / friday(8/11)

Unnamed: 0.1,Unnamed: 0,cellid,callout0,callout1,callout2,callout3,callout4,callout5,callout6,callout7,...,callout15,callout16,callout17,callout18,callout19,callout20,callout21,callout22,callout23,WHF
0,0,4379,1e-05,9.7e-05,0.0,0.0,5e-06,0.000152,0.000584,0.002842,...,0.009999,0.009711,0.011318,0.008949,0.005031,0.005754,0.002906,0.001478,0.0006,1
1,1,4380,0.000157,5.7e-05,0.0,9e-06,0.0,0.000237,0.000978,0.006365,...,0.017284,0.016388,0.019477,0.015358,0.011301,0.008685,0.005553,0.002499,0.000675,0
2,2,4381,0.000294,4e-05,2.3e-05,1.7e-05,0.0,0.000282,0.000605,0.006603,...,0.010329,0.010004,0.012635,0.010447,0.005459,0.004089,0.003268,0.001387,0.000201,1
3,3,4382,0.000525,0.000497,0.000245,0.000252,0.0,0.000538,0.001771,0.013803,...,0.034587,0.036961,0.052016,0.038642,0.024501,0.022786,0.009294,0.00922,0.002008,1
4,4,4383,0.000594,0.000537,0.000273,0.000308,0.0,0.000442,0.00183,0.014267,...,0.036211,0.037631,0.053166,0.038921,0.02418,0.022625,0.010503,0.009822,0.0021,1


In [81]:
y = data2.iloc[:,26] #h/w
x = data2.iloc[:,2:26] #sms, calls, internet

In [82]:
knn = KNeighborsClassifier(n_neighbors=10, metric=cid_distance)
scores1 = cross_val_score(knn, x, y, cv=5)
scores2 = cross_val_score(knn, x, y, cv=10)

In [83]:
print("5 fold: %0.2f accuracy with a standard deviation of %0.2f" % (scores1.mean(), scores1.std()))
print("10 fold: %f accuracy with a standard deviation of %f" % (scores2.mean(dtype=np.float64), scores2.std(dtype=np.float64)))
print(scores2)

5 fold: 0.51 accuracy with a standard deviation of 0.05
10 fold: 0.493810 accuracy with a standard deviation of 0.085265
[0.53333333 0.26666667 0.53333333 0.53333333 0.5        0.5
 0.57142857 0.5        0.57142857 0.42857143]


In [84]:
y_predt = cross_val_predict(knn, x, y, cv=10) #10

In [85]:
data = pd.read_csv("../csv/12x12/classif_12x12_internet.csv")
#data = pd.read_csv("../csv/20x20/classif20x20frid_internet.csv")

data.head()
#20x20 grid, monday(4/11) - internet / friday(8/11)

Unnamed: 0.1,Unnamed: 0,cellid,internet0,internet1,internet2,internet3,internet4,internet5,internet6,internet7,...,internet15,internet16,internet17,internet18,internet19,internet20,internet21,internet22,internet23,WHF
0,0,4379,0.00153,0.001053,0.00094,0.000798,0.001417,0.001186,0.002872,0.005092,...,0.005689,0.006374,0.006208,0.006848,0.005161,0.005127,0.003854,0.002673,0.001918,1
1,1,4380,0.003506,0.003243,0.002743,0.002384,0.002455,0.002713,0.004548,0.00878,...,0.009587,0.010829,0.011307,0.010931,0.010601,0.010218,0.008465,0.006533,0.00536,0
2,2,4381,0.001677,0.000709,0.000468,0.000421,0.000374,0.000791,0.001445,0.003357,...,0.003838,0.00546,0.005321,0.004671,0.004413,0.00471,0.00434,0.002465,0.001909,1
3,3,4382,0.017321,0.009843,0.008162,0.008914,0.008936,0.010223,0.014441,0.021248,...,0.021898,0.02319,0.025866,0.026859,0.024065,0.02454,0.025041,0.018586,0.014938,1
4,4,4383,0.018642,0.010206,0.008517,0.009201,0.009244,0.010382,0.014655,0.022601,...,0.024213,0.025071,0.028383,0.028785,0.025767,0.026371,0.026844,0.019979,0.015868,1


In [86]:
y = data.iloc[:,26] #h/w
x = data.iloc[:,2:26] #sms, calls, internet

In [87]:
knn = KNeighborsClassifier(n_neighbors=10)
scores1 = cross_val_score(knn, x, y, cv=5)
scores2 = cross_val_score(knn, x, y, cv=10)
print(np.std(scores2))

0.04053217416888887


In [88]:
print("5 fold: %0.2f accuracy with a standard deviation of %0.2f" % (scores1.mean(), scores1.std()))
print("10 fold: %f accuracy with a standard deviation of %f" % (scores2.mean(dtype=np.float64), scores2.std(dtype=np.float64)))
print(scores2)

5 fold: 0.44 accuracy with a standard deviation of 0.05
10 fold: 0.445238 accuracy with a standard deviation of 0.040532
[0.46666667 0.4        0.4        0.4        0.42857143 0.5
 0.42857143 0.5        0.42857143 0.5       ]


In [89]:
#y_predt = cross_val_predict(knn, x, y, cv=5) #10

In [90]:
predicted = pd.DataFrame(np.transpose(y_predt),columns=['predicted'])
predicted['cellid']=data2.cellid
predicted.head()

Unnamed: 0,predicted,cellid
0,1,4379
1,2,4380
2,1,4381
3,1,4382
4,1,4383


In [91]:
predicted.to_csv('../csv/predicted_co_12x12_cid.csv')