In [84]:
# Run this whenever you shut down the VM
# !pip3 install cassandra-driver --user
# !pip install folium
# !pip install --upgrade pip

In [85]:
from cassandra.cluster import Cluster
from cassandra.auth import PlainTextAuthProvider
from cassandra.query import SimpleStatement
from cassandra import ConsistencyLevel
import datetime
import folium



import pandas as pd

def pandas_factory(colnames, rows):
    return pd.DataFrame(rows, columns=colnames)
def connect():
    cluster = Cluster(
        contact_points=['172.31.22.32'], 
        auth_provider = PlainTextAuthProvider(username='tomcat', password='drawsweb'),
        connect_timeout = 20,
        control_connection_timeout=None
    )
    session = cluster.connect()
    session.set_keyspace('disruptionwarning')
    session.row_factory = pandas_factory
    session.default_fetch_size = 1000000 #needed for large queries, otherwise driver will do pagination. Default is 50000.
    return session

session=connect()

# Set variables for analysis

In [86]:
dest='KSFO'
airline='UAL'
flightno='2744'
fromdt=datetime.date(2019,10,13)
todt=datetime.date(2019,10,14)
regno='N57864'

In [87]:
# Flights added to the registry from flightplans_destination table
# SELECT * FROM flightplans_destination where dest = 'KSFO' AND airline='UAL' and flightno='2744' and departure > '2019-10-13'  allow filtering;
query="SELECT * FROM flightplans_destination where dest = ? AND airline=? and flightno=? and departure >= ? and departure <= ?  allow filtering"
fp_dest_stmt=session.prepare(query)
fp_dest_stmt.consistency_level = ConsistencyLevel.LOCAL_ONE

rows_dest = session.execute(fp_dest_stmt, [dest,airline,flightno,fromdt,todt])
df_dest = rows_dest._current_rows 
df_dest_part=df_dest[['dest','departure','airline','flightno','received','id','registration','timestamp']]

#Cancellation query
query="SELECT * FROM cancellation WHERE airline=? AND flightno=? AND timestamp >= ? AND timestamp <= ?"
fp_cancel_stmt=session.prepare(query)
fp_cancel_stmt.consistency_level = ConsistencyLevel.LOCAL_ONE

rows_cancel = session.execute(fp_cancel_stmt, [airline,flightno,fromdt,todt])
df_cancel = rows_cancel._current_rows 
df_cancel_part=df_cancel[['id','origin']]

#joining based on id, in case origin is NaN then there are no records for the id in the cancellation table
df_merge=pd.merge(df_dest_part,df_cancel_part,left_on='id',right_on='id', how='left')
df_merge


Unnamed: 0,dest,departure,airline,flightno,received,id,registration,timestamp,origin
0,KSFO,2019-10-13 04:40:00,UAL,2744,2019-10-12 23:01:23,UAL2744-1570921283-fa-0000,,2019-10-13 04:40:00,
1,KSFO,2019-10-13 04:40:00,UAL,2744,2019-10-12 23:01:24,UAL2744-1570921283-fa-0000,,2019-10-13 04:40:00,
2,KSFO,2019-10-13 04:40:00,UAL,2744,2019-10-12 23:04:16,UAL2744-1570921283-fa-0000,,2019-10-13 04:40:00,
3,KSFO,2019-10-13 04:40:00,UAL,2744,2019-10-12 23:05:09,UAL2744-1570921283-fa-0000,N57864,2019-10-13 04:40:00,
4,KSFO,2019-10-13 04:40:00,UAL,2744,2019-10-12 23:05:11,UAL2744-1570921283-fa-0000,N57864,2019-10-13 04:40:00,
5,KSFO,2019-10-13 04:40:00,UAL,2744,2019-10-13 00:45:08,UAL2744-1570921283-fa-0000,N57864,2019-10-13 04:40:00,
6,KSFO,2019-10-13 04:40:00,UAL,2744,2019-10-13 01:10:03,UAL2744-1570921283-fa-0000,N57864,2019-10-13 04:40:00,
7,KSFO,2019-10-13 04:40:00,UAL,2744,2019-10-13 01:39:09,UAL2744-1570921283-fa-0000,N57864,2019-10-13 04:40:00,
8,KSFO,2019-10-13 04:40:00,UAL,2744,2019-10-13 01:39:10,UAL2744-1570921283-fa-0000,N57864,2019-10-13 04:40:00,
9,KSFO,2019-10-13 04:40:00,UAL,2744,2019-10-13 01:39:41,UAL2744-1570921283-fa-0000,N57864,2019-10-13 04:40:00,


In [88]:
df_dest

Unnamed: 0,dest,departure,airline,flightno,received,altitude,arrival,duration,equipment,id,orig,registration,speed,status,timestamp,waypoints
0,KSFO,2019-10-13 04:40:00,UAL,2744,2019-10-12 23:01:23,0,2019-10-13 08:12:00,212,,UAL2744-1570921283-fa-0000,KSAT,,365,S,2019-10-13 04:40:00,
1,KSFO,2019-10-13 04:40:00,UAL,2744,2019-10-12 23:01:24,0,2019-10-13 08:12:00,212,,UAL2744-1570921283-fa-0000,KSAT,,365,S,2019-10-13 04:40:00,
2,KSFO,2019-10-13 04:40:00,UAL,2744,2019-10-12 23:04:16,0,2019-10-13 08:12:00,212,,UAL2744-1570921283-fa-0000,KSAT,,365,S,2019-10-13 04:40:00,
3,KSFO,2019-10-13 04:40:00,UAL,2744,2019-10-12 23:05:09,0,2019-10-13 08:12:00,212,,UAL2744-1570921283-fa-0000,KSAT,N57864,365,S,2019-10-13 04:40:00,
4,KSFO,2019-10-13 04:40:00,UAL,2744,2019-10-12 23:05:11,0,2019-10-13 08:12:00,212,,UAL2744-1570921283-fa-0000,KSAT,N57864,365,S,2019-10-13 04:40:00,
5,KSFO,2019-10-13 04:40:00,UAL,2744,2019-10-13 00:45:08,0,2019-10-13 08:12:00,212,,UAL2744-1570921283-fa-0000,KSAT,N57864,365,S,2019-10-13 04:40:00,
6,KSFO,2019-10-13 04:40:00,UAL,2744,2019-10-13 01:10:03,0,2019-10-13 08:12:00,212,,UAL2744-1570921283-fa-0000,KSAT,N57864,365,F,2019-10-13 04:40:00,
7,KSFO,2019-10-13 04:40:00,UAL,2744,2019-10-13 01:39:09,0,2019-10-13 08:12:00,212,,UAL2744-1570921283-fa-0000,KSAT,N57864,365,F,2019-10-13 04:40:00,
8,KSFO,2019-10-13 04:40:00,UAL,2744,2019-10-13 01:39:10,0,2019-10-13 08:09:00,209,,UAL2744-1570921283-fa-0000,KSAT,N57864,365,F,2019-10-13 04:40:00,
9,KSFO,2019-10-13 04:40:00,UAL,2744,2019-10-13 01:39:41,34000,2019-10-13 07:57:58,197,B753,UAL2744-1570921283-fa-0000,KSAT,N57864,482,F,2019-10-13 04:40:00,"[{""lon"":-98.47,""lat"":29.53},{""lon"":-98.52,""lat..."


In [89]:
#positions table correlate with records fro fp arrival departure
query="SELECT * FROM position WHERE airline=? AND flightno=? AND timestamp >= ? and timestamp <= ? ALLOW FILTERING"
fp_pos_lookup=session.prepare(query)
fp_pos_lookup.consistency_level = ConsistencyLevel.LOCAL_ONE

rows_pos = session.execute(fp_pos_lookup, [airline,flightno,fromdt,todt])
df_pos = rows_pos._current_rows 
df_pos


Unnamed: 0,airline,flightno,timestamp,altitude,heading,latitude,longitude,registration,type
0,UAL,2744,2019-10-13 04:25:33,675.0,131.0,-98.47537,29.53487,N57864,A
1,UAL,2744,2019-10-13 04:25:49,1225.0,132.0,-98.46475,29.52650,N57864,A
2,UAL,2744,2019-10-13 04:26:05,2025.0,133.0,-98.45541,29.51898,N57864,A
3,UAL,2744,2019-10-13 04:26:21,2425.0,132.0,-98.44466,29.51039,N57864,A
4,UAL,2744,2019-10-13 04:26:37,3100.0,132.0,-98.43222,29.50058,N57864,A
5,UAL,2744,2019-10-13 04:26:53,3850.0,120.0,-98.41784,29.49061,N57864,A
6,UAL,2744,2019-10-13 04:27:09,4600.0,89.0,-98.39704,29.48698,N57864,A
7,UAL,2744,2019-10-13 04:27:25,5100.0,63.0,-98.37627,29.49201,N57864,A
8,UAL,2744,2019-10-13 04:27:44,6050.0,34.0,-98.35600,29.50809,N57864,A
9,UAL,2744,2019-10-13 04:28:00,6950.0,8.0,-98.34665,29.53088,N57864,A


In [90]:
#create a map
this_map = folium.Map()
for index, row in df_pos.iterrows():
    folium.CircleMarker([row['longitude'], row['latitude']],
                        radius=1,
                        popup=row['timestamp'],
                        fill_color="#3db7e4", # divvy color
                       ).add_to(this_map)
#     print(row['latitude'], row['longitude'])
this_map.fit_bounds(this_map.get_bounds())
this_map

In [91]:
# #timestamp vs altitude
# #timestamp vs speed
# from  matplotlib import pyplot as plt
# %matplotlib inline
# import seaborn as sns

# # Use seaborn style defaults and set the default figure size
# sns.set(rc={'figure.figsize':(11, 4)})

# # df_pos['altitude'].plot
# # pyplot.show()
# df_chart=df_pos[['timestamp','altitude']]
# # df_chart.plot(linewidth=0.5)
# # pyplot.show()
# df_chart.dtypes




# from matplotlib import dates as mPlotDATEs   # helper functions num2date()
# #                                            #              and date2num()
# #                                            #              to convert to/from.
# from matplotlib.dates   import  DateFormatter,    \
#                                 AutoDateLocator,   \
#                                 HourLocator,        \
#                                 MinuteLocator,       \
#                                 epoch2num
# from matplotlib.ticker  import  ScalarFormatter, FuncFormatter
# # plt.set_xlim( x_min, x_MAX )               # X-AXIS LIMITs ------------------------------------------------------------------------------- X-LIMITs

# #lt.gca().xaxis.set_major_locator(      matplotlib.ticker.FixedLocator(  secs ) )
# #lt.gca().xaxis.set_major_formatter(    matplotlib.ticker.FuncFormatter( lambda pos, _: time.strftime( "%d-%m-%Y %H:%M:%S", time.localtime( pos ) ) ) )

# plt.xaxis.set_major_locator(   AutoDateLocator() )
# plt.xaxis.set_major_formatter( DateFormatter( '%Y-%m-%d %H:%M' ) )  # ----------------------------------------------------------------------------------------- X-FORMAT

# #--------------------------------------------- # 90-deg x-tick-LABELs

# plt.setp( plt.gca().get_xticklabels(),  rotation            = 90,
#                             horizontalalignment = 'right'
#                             )

# #------------------------------------------------------------------

In [92]:
#Update on basis of tail and flightplans respectively            
query="SELECT * FROM flightplans_registration WHERE registration=? AND airline=? AND flightno=? AND timestamp>=? and timestamp <=?"

fp_regn_stmt=session.prepare(query)
fp_regn_stmt.consistency_level = ConsistencyLevel.LOCAL_ONE

rows_regn = session.execute(fp_regn_stmt, [regno,airline,flightno,fromdt,todt])
df_regn = rows_regn._current_rows 
df_regn_part=df_regn[['dest','departure','airline','flightno','received','id','registration','timestamp']]

df_regn_part

Unnamed: 0,dest,departure,airline,flightno,received,id,registration,timestamp
0,KORD,2019-10-13 04:25:00,UAL,2744,2019-10-13 04:25:45,UAL2744-1570921283-fa-0000,N57864,2019-10-13 04:40:00
1,KORD,2019-10-13 04:25:49,UAL,2744,2019-10-13 06:23:36,UAL2744-1570921283-fa-0000,N57864,2019-10-13 04:40:00
2,KORD,2019-10-13 04:25:49,UAL,2744,2019-10-13 06:26:23,UAL2744-1570921283-fa-0000,N57864,2019-10-13 04:40:00
3,KORD,2019-10-13 04:25:49,UAL,2744,2019-10-13 06:29:44,UAL2744-1570921283-fa-0000,N57864,2019-10-13 04:40:00
4,KORD,2019-10-13 04:25:49,UAL,2744,2019-10-13 06:31:24,UAL2744-1570921283-fa-0000,N57864,2019-10-13 04:40:00
5,KORD,2019-10-13 04:25:49,UAL,2744,2019-10-13 06:37:37,UAL2744-1570921283-fa-0000,N57864,2019-10-13 04:40:00
6,KORD,2019-10-13 04:25:49,UAL,2744,2019-10-13 06:51:26,UAL2744-1570921283-fa-0000,N57864,2019-10-13 04:40:00
7,KORD,2019-10-13 04:25:49,UAL,2744,2019-10-13 06:51:27,UAL2744-1570921283-fa-0000,N57864,2019-10-13 04:40:00
8,KORD,2019-10-13 04:25:49,UAL,2744,2019-10-13 06:51:28,UAL2744-1570921283-fa-0000,N57864,2019-10-13 04:40:00
9,KORD,2019-10-13 04:25:49,UAL,2744,2019-10-13 06:17:28,UAL2744-1570921283-fa-0000,N57864,2019-10-13 04:40:00


In [85]:
query="SELECT * FROM flightplans WHERE airline=? AND flightno=? AND departure>=? AND departure<=? AND dest=? ALLOW FILTERING"

fp_dest_lookup_stmt=session.prepare(query)
fp_dest_lookup_stmt.consistency_level = ConsistencyLevel.LOCAL_ONE

rows = session.execute(fp_dest_lookup_stmt, [airline,flightno,fromdt,todt,dest])
df = rows._current_rows 
df

Unnamed: 0,airline,flightno,departure,received,altitude,arrival,dest,duration,equipment,id,orig,registration,speed,status,timestamp,waypoints
0,UAL,2744,2019-10-13 04:40:00,2019-10-12 23:01:23,0,2019-10-13 08:12:00,KSFO,212,,UAL2744-1570921283-fa-0000,KSAT,,365,S,2019-10-13 04:40:00,
1,UAL,2744,2019-10-13 04:40:00,2019-10-12 23:01:24,0,2019-10-13 08:12:00,KSFO,212,,UAL2744-1570921283-fa-0000,KSAT,,365,S,2019-10-13 04:40:00,
2,UAL,2744,2019-10-13 04:40:00,2019-10-12 23:04:16,0,2019-10-13 08:12:00,KSFO,212,,UAL2744-1570921283-fa-0000,KSAT,,365,S,2019-10-13 04:40:00,
3,UAL,2744,2019-10-13 04:40:00,2019-10-12 23:05:09,0,2019-10-13 08:12:00,KSFO,212,,UAL2744-1570921283-fa-0000,KSAT,N57864,365,S,2019-10-13 04:40:00,
4,UAL,2744,2019-10-13 04:40:00,2019-10-12 23:05:11,0,2019-10-13 08:12:00,KSFO,212,,UAL2744-1570921283-fa-0000,KSAT,N57864,365,S,2019-10-13 04:40:00,
5,UAL,2744,2019-10-13 04:40:00,2019-10-13 00:45:08,0,2019-10-13 08:12:00,KSFO,212,,UAL2744-1570921283-fa-0000,KSAT,N57864,365,S,2019-10-13 04:40:00,
6,UAL,2744,2019-10-13 04:40:00,2019-10-13 01:10:03,0,2019-10-13 08:12:00,KSFO,212,,UAL2744-1570921283-fa-0000,KSAT,N57864,365,F,2019-10-13 04:40:00,
7,UAL,2744,2019-10-13 04:40:00,2019-10-13 01:39:09,0,2019-10-13 08:12:00,KSFO,212,,UAL2744-1570921283-fa-0000,KSAT,N57864,365,F,2019-10-13 04:40:00,
8,UAL,2744,2019-10-13 04:40:00,2019-10-13 01:39:10,0,2019-10-13 08:09:00,KSFO,209,,UAL2744-1570921283-fa-0000,KSAT,N57864,365,F,2019-10-13 04:40:00,
9,UAL,2744,2019-10-13 04:40:00,2019-10-13 01:39:41,34000,2019-10-13 07:57:58,KSFO,197,B753,UAL2744-1570921283-fa-0000,KSAT,N57864,482,F,2019-10-13 04:40:00,"[{""lon"":-98.47,""lat"":29.53},{""lon"":-98.52,""lat..."


In [86]:
#Cancellation query
query="SELECT * FROM cancellation WHERE airline=? AND flightno=? AND timestamp >= ? AND timestamp <= ?"
fp_dest_lookup_stmt=session.prepare(query)
fp_dest_lookup_stmt.consistency_level = ConsistencyLevel.LOCAL_ONE

rows = session.execute(fp_dest_lookup_stmt, [airline,flightno,fromdt,todt])
df = rows._current_rows 
df


Unnamed: 0,airline,flightno,timestamp,departure,destination,id,origin
0,UAL,2744,2019-10-13 02:39:46,2019-10-13 04:40:00,KSFO,UAL2744-1570933018-fa-0000,KORD


In [141]:
pd.options.display.max_rows = 4000

import json
rawjson=df_dest[9:10]['waypoints'].astype(str)
aa=rawjson.to_string()

xx=aa.split('9    ')[1]
print(xx.to_string())
# parsed=json.loads(xx)

# print (json.dumps(parsed, indent=2, sort_keys=True))

AttributeError: 'str' object has no attribute 'to_string'

In [149]:
rawjson=df_dest[9:10]['waypoints'].apply(json.loads)
parsed=rawjson.to_json()
print(parsed)

{"9":[{"lon":-98.47,"lat":29.53},{"lon":-98.52,"lat":29.6},{"lon":-98.53,"lat":29.62},{"lon":-98.56,"lat":29.66},{"lon":-98.56,"lat":29.66},{"lon":-98.62,"lat":29.74},{"lon":-98.65,"lat":29.78},{"lon":-98.74,"lat":29.85},{"lon":-98.8,"lat":29.89},{"lon":-98.89,"lat":29.95},{"lon":-98.9,"lat":29.95},{"lon":-98.94,"lat":29.98},{"lon":-98.98,"lat":30.01},{"lon":-99.12,"lat":30.11},{"lon":-99.21,"lat":30.17},{"lon":-99.35,"lat":30.26},{"lon":-99.37,"lat":30.27},{"lon":-99.37,"lat":30.27},{"lon":-99.53,"lat":30.3},{"lon":-99.61,"lat":30.32},{"lon":-99.82,"lat":30.36},{"lon":-99.89,"lat":30.37},{"lon":-100.03,"lat":30.4},{"lon":-100.25,"lat":30.44},{"lon":-100.26,"lat":30.45},{"lon":-101.03,"lat":30.6},{"lon":-101.42,"lat":30.67},{"lon":-102.27,"lat":30.83},{"lon":-102.98,"lat":30.95},{"lon":-103.45,"lat":31.08},{"lon":-105.38,"lat":31.59},{"lon":-106.23,"lat":31.8},{"lon":-106.28,"lat":31.82},{"lon":-108.2,"lat":32.39},{"lon":-108.9,"lat":32.59},{"lon":-110,"lat":33},{"lon":-110.5,"lat":33.