In [248]:
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
import matplotlib.pyplot as plt
import seaborn as sns

In [249]:
road_pt = pd.read_csv("mergeAccData.csv")
df = pd.DataFrame(road_pt)
df

Unnamed: 0,Latitude,Longitude,timestamp,xaxis,yaxis,zaxis,Landmark
0,12.972873,79.157654,1.000000e+13,0.0,0.0,2.081157,Albert Einstein Block
1,12.972873,79.157654,1.000000e+13,0.0,0.0,2.194218,Albert Einstein Block
2,12.972873,79.157654,1.000000e+13,0.0,0.0,2.069193,Albert Einstein Block
3,12.972873,79.157654,1.000000e+13,0.0,0.0,2.194218,Albert Einstein Block
4,12.972873,79.157654,1.000000e+13,0.0,0.0,2.284547,Albert Einstein Block
...,...,...,...,...,...,...,...
5056,12.972924,79.163299,1.100000e+13,0.0,0.0,2.053639,X5F7+1217
5057,12.972924,79.163299,1.100000e+13,0.0,0.0,2.053639,X5F7+1218
5058,12.972924,79.163299,1.100000e+13,0.0,0.0,2.053639,X5F7+1219
5059,12.972924,79.163299,1.100000e+13,0.0,0.0,2.556133,X5F7+1220


In [250]:
np.percentile(df.zaxis, [99, 90, 30])

array([5.51844692, 3.57487726, 2.08295155])

In [251]:
label = []
for i in df.zaxis:
    if i>5.51844692:
        label.append("MAJOR UNDULATION")
    elif i<5.51844692 and i>3.57487726:
        label.append("BAD")
    elif i>2.08295155 and i<3.57487726:  
        label.append("NORMAL")
    elif i<2.08295155:  
        label.append("GOOD")

In [252]:
df['Label_For_The_Road_Section'] = label
df #

Unnamed: 0,Latitude,Longitude,timestamp,xaxis,yaxis,zaxis,Landmark,Label_For_The_Road_Section
0,12.972873,79.157654,1.000000e+13,0.0,0.0,2.081157,Albert Einstein Block,GOOD
1,12.972873,79.157654,1.000000e+13,0.0,0.0,2.194218,Albert Einstein Block,NORMAL
2,12.972873,79.157654,1.000000e+13,0.0,0.0,2.069193,Albert Einstein Block,GOOD
3,12.972873,79.157654,1.000000e+13,0.0,0.0,2.194218,Albert Einstein Block,NORMAL
4,12.972873,79.157654,1.000000e+13,0.0,0.0,2.284547,Albert Einstein Block,NORMAL
...,...,...,...,...,...,...,...,...
5056,12.972924,79.163299,1.100000e+13,0.0,0.0,2.053639,X5F7+1217,GOOD
5057,12.972924,79.163299,1.100000e+13,0.0,0.0,2.053639,X5F7+1218,GOOD
5058,12.972924,79.163299,1.100000e+13,0.0,0.0,2.053639,X5F7+1219,GOOD
5059,12.972924,79.163299,1.100000e+13,0.0,0.0,2.556133,X5F7+1220,NORMAL


In [253]:
dfBad = df.loc[df["Label_For_The_Road_Section"] == "BAD"]
dfBad

Unnamed: 0,Latitude,Longitude,timestamp,xaxis,yaxis,zaxis,Landmark,Label_For_The_Road_Section
143,12.972873,79.157654,1.000000e+13,0.000000,0.0,4.045664,Albert Einstein Block,BAD
265,12.969623,79.158014,1.000000e+13,0.000000,0.0,4.039085,Amartya Sen Avenue,BAD
280,12.969623,79.158014,1.000000e+13,0.000000,0.0,4.220939,Amartya Sen Avenue,BAD
299,12.969623,79.158014,1.000000e+13,0.000000,0.0,4.931010,Amartya Sen Avenue,BAD
397,12.969623,79.158014,1.000000e+13,0.000000,0.0,4.039085,Amartya Sen Avenue,BAD
...,...,...,...,...,...,...,...,...
4648,12.972857,79.164553,5.040000e+12,0.000000,0.0,3.626024,X5F7+4HR,BAD
4669,12.972857,79.164553,5.040000e+12,0.000000,0.0,3.863811,X5F7+4HR,BAD
4687,12.972857,79.164553,5.040000e+12,0.000000,0.0,3.752844,X5F7+4HR,BAD
4689,12.972806,79.164353,5.040000e+12,0.000000,0.0,3.632006,X5F7+4HR,BAD


In [254]:
groupedBad = dfBad.groupby(['Latitude', 'Longitude']).size().reset_index(name='BAD_Presence(FREQUENCY)')
dfB = pd.DataFrame(groupedBad)
dfB
df

Unnamed: 0,Latitude,Longitude,timestamp,xaxis,yaxis,zaxis,Landmark,Label_For_The_Road_Section
0,12.972873,79.157654,1.000000e+13,0.0,0.0,2.081157,Albert Einstein Block,GOOD
1,12.972873,79.157654,1.000000e+13,0.0,0.0,2.194218,Albert Einstein Block,NORMAL
2,12.972873,79.157654,1.000000e+13,0.0,0.0,2.069193,Albert Einstein Block,GOOD
3,12.972873,79.157654,1.000000e+13,0.0,0.0,2.194218,Albert Einstein Block,NORMAL
4,12.972873,79.157654,1.000000e+13,0.0,0.0,2.284547,Albert Einstein Block,NORMAL
...,...,...,...,...,...,...,...,...
5056,12.972924,79.163299,1.100000e+13,0.0,0.0,2.053639,X5F7+1217,GOOD
5057,12.972924,79.163299,1.100000e+13,0.0,0.0,2.053639,X5F7+1218,GOOD
5058,12.972924,79.163299,1.100000e+13,0.0,0.0,2.053639,X5F7+1219,GOOD
5059,12.972924,79.163299,1.100000e+13,0.0,0.0,2.556133,X5F7+1220,NORMAL


In [255]:
# # merged_df = pd.merge(groupedBad, df, on=['Latitude','Longitude','Landmark'])
# # merged_df
# merged_df = pd.merge(groupedBad, df, on=['Latitude','Longitude'], how='left')
# merged_df

In [256]:
# groupedBad = merged_df.groupby(['Latitude', 'Longitude','Landmark']).size()
# groupedBad

In [257]:
dfMajor=df.loc[df["Label_For_The_Road_Section"] == "MAJOR UNDULATION"]
dfMajor

Unnamed: 0,Latitude,Longitude,timestamp,xaxis,yaxis,zaxis,Landmark,Label_For_The_Road_Section
291,12.969623,79.158014,10000000000000.0,0.0,0.0,5.539384,Amartya Sen Avenue,MAJOR UNDULATION
926,12.972435,79.159225,10000000000000.0,0.0,0.0,6.113064,Mens Hostel Office,MAJOR UNDULATION
929,12.972435,79.159225,10000000000000.0,0.0,0.0,5.518447,Mens Hostel Office,MAJOR UNDULATION
939,12.972435,79.159225,10000000000000.0,0.0,0.0,5.770292,Mens Hostel Office,MAJOR UNDULATION
955,12.972435,79.159225,10000000000000.0,0.0,0.0,5.770292,Mens Hostel Office,MAJOR UNDULATION
966,12.972435,79.159225,10000000000000.0,0.0,0.0,5.794219,Mens Hostel Office,MAJOR UNDULATION
968,12.972435,79.159225,10000000000000.0,0.0,0.0,6.653244,Mens Hostel Office,MAJOR UNDULATION
969,12.972435,79.159225,10000000000000.0,0.0,0.0,6.340382,Mens Hostel Office,MAJOR UNDULATION
982,12.972435,79.159225,10000000000000.0,0.0,0.0,7.155138,Mens Hostel Office,MAJOR UNDULATION
1042,12.972435,79.159225,10000000000000.0,0.0,0.0,5.794219,Mens Hostel Office,MAJOR UNDULATION


In [258]:
groupedMaj = dfMajor.groupby(['Latitude', 'Longitude']).size().reset_index(name='MAJOR_UNDULATION_PRESENCE(FREQUENCY)')
dfM = pd.DataFrame(groupedMaj)
dfM

Unnamed: 0,Latitude,Longitude,MAJOR_UNDULATION_PRESENCE(FREQUENCY)
0,12.969623,79.158014,1
1,12.972232,79.160922,4
2,12.972435,79.159225,18
3,12.972563,79.161294,9
4,12.972906,79.163959,5
5,12.972922,79.157448,15


In [282]:
inner_merged_df = pd.merge(dfB, dfM, on='Latitude', how='outer')
dfMe = pd.DataFrame(inner_merged_df)
dfMe

Unnamed: 0,Latitude,Longitude_x,BAD_Presence(FREQUENCY),Longitude_y,MAJOR_UNDULATION_PRESENCE(FREQUENCY)
0,12.969623,79.158014,10,79.158014,1.0
1,12.97194,79.159615,121,,
2,12.972232,79.160922,17,79.160922,4.0
3,12.972435,79.159225,26,79.159225,18.0
4,12.972463,79.161559,39,,
5,12.972472,79.159124,12,,
6,12.972563,79.161294,29,79.161294,9.0
7,12.972684,79.162909,34,,
8,12.972711,79.159446,5,,
9,12.972732,79.162874,13,,


In [283]:
dfMe.drop(labels=['Longitude_y'], axis=1)

Unnamed: 0,Latitude,Longitude_x,BAD_Presence(FREQUENCY),MAJOR_UNDULATION_PRESENCE(FREQUENCY)
0,12.969623,79.158014,10,1.0
1,12.97194,79.159615,121,
2,12.972232,79.160922,17,4.0
3,12.972435,79.159225,26,18.0
4,12.972463,79.161559,39,
5,12.972472,79.159124,12,
6,12.972563,79.161294,29,9.0
7,12.972684,79.162909,34,
8,12.972711,79.159446,5,
9,12.972732,79.162874,13,


In [260]:
lat = df['Latitude'] 
long = df['Longitude']
z = df['zaxis']

In [261]:
l = []
m = []
n = []
c = [] 
l.append(lat[0])
m.append(long[0])
n.append(z[0])
c.append(1)
i=1
def fun():
     for i in range(len(lat)):
            if lat[i] in l:
                index = l.index(lat[i])
                if long[i] == m[index]:
                    n[index] = n[index] + z[i]
                    c[index] = c[index] +1
                else:
                    l.append(lat[i])
                    m.append(long[i])
                    n.append(z[i])
                    c.append(1)
            else:
                l.append(lat[i])
                m.append(long[i])
                n.append(z[i])
                c.append(1)

In [262]:
fun() 

In [263]:
df2 = pd.DataFrame(list(zip(l,m,n,c)),
               columns =['Latitude', 'Longitude','zaxis','count'])

In [264]:
df2['zaxisAverage'] = df2['zaxis'] / df2['count']
df2

Unnamed: 0,Latitude,Longitude,zaxis,count,zaxisAverage
0,12.972873,79.157654,526.669135,239,2.203637
1,12.969623,79.158014,858.90308,342,2.511413
2,12.972463,79.161559,793.079582,276,2.873477
3,12.972257,79.157525,33.327225,16,2.082952
4,12.972472,79.159124,728.135921,280,2.600485
5,12.972435,79.159225,957.712271,340,2.816801
6,12.972732,79.162874,394.093715,154,2.55905
7,12.972684,79.162909,832.608451,339,2.456072
8,12.972618,79.162596,180.992549,66,2.742311
9,12.973416,79.164118,200.52093,254,0.789452


In [265]:
import statistics
average = statistics.mean(df2.zaxisAverage)
std_dev = statistics.stdev(df2.zaxisAverage)

In [266]:
a = average + std_dev 
b = average - std_dev

In [267]:
a

3.00648025320418

In [268]:
b

1.8183698157620638

In [269]:
labell = []
for i in df2.zaxisAverage:
    if i>a:
        labell.append("BAD")
    elif i<a and i>b:
        labell.append("NORMAL")
    elif i<b:  
        labell.append("GOOD")

In [270]:
df2['Label_For_The_Road_Section'] = labell
df2

Unnamed: 0,Latitude,Longitude,zaxis,count,zaxisAverage,Label_For_The_Road_Section
0,12.972873,79.157654,526.669135,239,2.203637,NORMAL
1,12.969623,79.158014,858.90308,342,2.511413,NORMAL
2,12.972463,79.161559,793.079582,276,2.873477,NORMAL
3,12.972257,79.157525,33.327225,16,2.082952,NORMAL
4,12.972472,79.159124,728.135921,280,2.600485,NORMAL
5,12.972435,79.159225,957.712271,340,2.816801,NORMAL
6,12.972732,79.162874,394.093715,154,2.55905,NORMAL
7,12.972684,79.162909,832.608451,339,2.456072,NORMAL
8,12.972618,79.162596,180.992549,66,2.742311,NORMAL
9,12.973416,79.164118,200.52093,254,0.789452,GOOD


In [271]:
df2nd = pd.DataFrame(df2)
df2nd

Unnamed: 0,Latitude,Longitude,zaxis,count,zaxisAverage,Label_For_The_Road_Section
0,12.972873,79.157654,526.669135,239,2.203637,NORMAL
1,12.969623,79.158014,858.90308,342,2.511413,NORMAL
2,12.972463,79.161559,793.079582,276,2.873477,NORMAL
3,12.972257,79.157525,33.327225,16,2.082952,NORMAL
4,12.972472,79.159124,728.135921,280,2.600485,NORMAL
5,12.972435,79.159225,957.712271,340,2.816801,NORMAL
6,12.972732,79.162874,394.093715,154,2.55905,NORMAL
7,12.972684,79.162909,832.608451,339,2.456072,NORMAL
8,12.972618,79.162596,180.992549,66,2.742311,NORMAL
9,12.973416,79.164118,200.52093,254,0.789452,GOOD


In [285]:
left_merged_df = pd.merge(df2nd, dfMe, on=['Latitude'], how='left')
df3rd = pd.DataFrame(left_merged_df)
df3rd

Unnamed: 0,Latitude,Longitude,zaxis,count,zaxisAverage,Label_For_The_Road_Section,Longitude_x,BAD_Presence(FREQUENCY),Longitude_y,MAJOR_UNDULATION_PRESENCE(FREQUENCY)
0,12.972873,79.157654,526.669135,239,2.203637,NORMAL,79.157654,1.0,,
1,12.969623,79.158014,858.90308,342,2.511413,NORMAL,79.158014,10.0,79.158014,1.0
2,12.972463,79.161559,793.079582,276,2.873477,NORMAL,79.161559,39.0,,
3,12.972257,79.157525,33.327225,16,2.082952,NORMAL,,,,
4,12.972472,79.159124,728.135921,280,2.600485,NORMAL,79.159124,12.0,,
5,12.972435,79.159225,957.712271,340,2.816801,NORMAL,79.159225,26.0,79.159225,18.0
6,12.972732,79.162874,394.093715,154,2.55905,NORMAL,79.162874,13.0,,
7,12.972684,79.162909,832.608451,339,2.456072,NORMAL,79.162909,34.0,,
8,12.972618,79.162596,180.992549,66,2.742311,NORMAL,,,,
9,12.973416,79.164118,200.52093,254,0.789452,GOOD,79.164118,5.0,,


In [286]:
df3rd.drop(labels=['Longitude_y','Longitude_x','zaxis','count'], axis=1)

Unnamed: 0,Latitude,Longitude,zaxisAverage,Label_For_The_Road_Section,BAD_Presence(FREQUENCY),MAJOR_UNDULATION_PRESENCE(FREQUENCY)
0,12.972873,79.157654,2.203637,NORMAL,1.0,
1,12.969623,79.158014,2.511413,NORMAL,10.0,1.0
2,12.972463,79.161559,2.873477,NORMAL,39.0,
3,12.972257,79.157525,2.082952,NORMAL,,
4,12.972472,79.159124,2.600485,NORMAL,12.0,
5,12.972435,79.159225,2.816801,NORMAL,26.0,18.0
6,12.972732,79.162874,2.55905,NORMAL,13.0,
7,12.972684,79.162909,2.456072,NORMAL,34.0,
8,12.972618,79.162596,2.742311,NORMAL,,
9,12.973416,79.164118,0.789452,GOOD,5.0,


In [289]:
df3rd.to_csv("C:/Users/HP/Downloads/output.csv",index=False)

In [160]:
# merged_df = pd.merge(groupedMaj, df2, on=['Latitude','Longitude'])
# merged_df

In [107]:
# la = df2['Latitude'] 
# lon = df2['Longitude']
# latt = groupedMaj['Latitude'] 
# longg = groupedMaj['Longitude']
# freqqm = groupedMaj['FreqMaj']
# lab = []
# for i in range(len(latt)):
#     if latt[i] in la:
#         index = la.index(latt[i])
#         lab[index]= freqqm[i]

In [110]:
# Lat = df['Latitude'] 
# Long = df['Longitude']
# lab = df['Label_For_The_Road_Section']
# l = []
# m = []
# n = []
# l.append(lat[0])
# m.append(long[0])
#  def fun1():
#      for i in range(len(lat)):
#             if lat[i] in l:
#                 index = l.index(lat[i])
#                 if long[i] == m[index]:
#                     label_counts = Counter(lab)
#                     n[index]=label_counts
#                 else:
#                     l.append(lat[i])
#                     m.append(long[i])
#                     n.append(z[i])
                    
#             else:
#                 l.append(lat[i])
#                 m.append(long[i])
#                 n.append(z[i])
# fun1()
# df3 = pd.DataFrame(list(zip(l,m,n)),
#                columns =['lat', 'long','lab'])
# df3

In [80]:
# Lat = df['Latitude'] 
# Long = df['Longitude']
# z = df['zaxis']
# # lab = df['Label For The Road Section']

In [81]:
# df2 = pd.DataFrame(list(zip(l,m,n)),
#                columns =['lat', 'long','zaxis'])
# df2

In [82]:
# df.lat.duplicated().sum()

In [83]:
# label = df['Label For The Road Section']

In [84]:
# majorFreq = []
# badFreq = []
# count1 = 0
# count2 = 0
# for i in the range(len(df2)):
#     if df2.lat[i] in df:
#         if df.Label For The Road Section == "MAJOR UNDUALTION":
#             count1++
#         elif df.Label For The Road Section == "BAD":
#             count2++
    

    

In [85]:
# def sum(lst):
#     sume=0
#     for i in lst:
#         sume=sume+i
#     return sume

# n=[2,3,4,5,6,7]
# sum(n)
# print(sum(n))

In [None]:
# lst=[1]
# len(lst)