In [43]:
# dependencies
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pylab
import json

#Question 1

###Explore Raw Data

In [44]:
# directory containing this file
filedir = !pwd
filedir = filedir[0]

# import data
csv_path = os.path.join(filedir, "data.csv")
df = pd.read_csv(csv_path)

In [45]:
# view dimensions of data
df.shape

(6454, 6)

In [46]:
# view head of data
df.head()

Unnamed: 0,Carrier,Activity,Data_Direction,LTE_Signal_Strength,Final_Test_Speed,Network_Types
0,A,Drive,Download,-86.666667,54111.31316,LTE
1,A,Drive,Download,-88.0,8316.654272,LTE
2,A,Drive,Download,-89.333333,8483.993072,LTE
3,A,Drive,Download,-61.0,39363.95864,LTE
4,A,Drive,Download,-88.0,6098.378905,LTE


In [47]:
# view tail of data
df.tail()

Unnamed: 0,Carrier,Activity,Data_Direction,LTE_Signal_Strength,Final_Test_Speed,Network_Types
6449,D,Outdoor,Upload,-85,41758.99756,LTE
6450,D,Outdoor,Upload,-85,40584.11999,LTE
6451,D,Outdoor,Upload,-85,40899.93868,LTE
6452,D,Outdoor,Upload,-85,41345.76277,LTE
6453,D,Outdoor,Upload,-85,41335.79651,LTE


In [48]:
df.describe()

Unnamed: 0,LTE_Signal_Strength,Final_Test_Speed
count,6190.0,6373.0
mean,-94.001912,14176.271929
std,11.239601,16324.441748
min,-121.666667,3.639271
25%,-102.0,3384.522434
50%,-95.0,8826.213484
75%,-87.0,18458.38472
max,-54.0,117017.4722


###Filter & Format Data

In [49]:
# get only requested data
df_filt = df[ (df['Activity']=='Drive') 
             & (df['Network_Types']=='LTE') 
             & (df['Data_Direction']=='Download')
            ]

# convert speed from kilobits to megabits
df_filt.loc[:,('Final_Test_Speed')] /= 1000

# Carrier B
df_filt_B = df_filt[df_filt['Carrier']=='B']

# Carrier C
df_filt_C = df_filt[df_filt['Carrier']=='C']



###View Filtered Data

In [50]:
# view head of data
df_filt_B.head()

Unnamed: 0,Carrier,Activity,Data_Direction,LTE_Signal_Strength,Final_Test_Speed,Network_Types
1616,B,Drive,Download,-99.666667,4.153838,LTE
1617,B,Drive,Download,-98.333333,2.348823,LTE
1618,B,Drive,Download,-106.0,1.930463,LTE
1619,B,Drive,Download,-101.333333,4.303711,LTE
1620,B,Drive,Download,-82.0,5.983975,LTE


In [51]:
# view head of data
df_filt_C.head()

Unnamed: 0,Carrier,Activity,Data_Direction,LTE_Signal_Strength,Final_Test_Speed,Network_Types
3231,C,Drive,Download,-78.666667,45.887569,LTE
3232,C,Drive,Download,-102.333333,6.91199,LTE
3233,C,Drive,Download,-104.5,4.165152,LTE
3234,C,Drive,Download,-85.5,13.613573,LTE
3235,C,Drive,Download,-92.333333,22.843607,LTE


###Plot requested data

In [52]:
# plot Carrier C strength vs speed
xC = df_filt_C['LTE_Signal_Strength'].tolist()
yC = df_filt_C['Final_Test_Speed'].tolist()
plt.plot(xC, yC, 'ro')

# plot Carrier B strength vs speed
xB = df_filt_B['LTE_Signal_Strength'].tolist()
yB = df_filt_B['Final_Test_Speed'].tolist()
plt.plot(xB, yB, 'go')

# format plot
plt.title('Download Speed vs LTE Strength')
plt.xlabel('LTE Signal Strength')
plt.ylabel('Final Test Speed (megabits)')

# show plot
plt.show()

#Question 2

###Convert data to JSON

In [53]:
# get set of carriers names
carriers = set(df['Carrier'].tolist())

# create json data structure
datastruct = []
for carrier in carriers:
    
    # get carrier specific data from raw data
    df_carrier = df[df['Carrier']==carrier]
    
    # init empty carrier data structure
    carrier_ds = {
        "Carrier": carrier,
        "plots": []
    }
    
    # fill carrier structure
    for idx, row in df_carrier.iterrows():
        carrier_ds["plots"].append({
            "Activity": row["Activity"],
            "Data_Direction": row["Data_Direction"],
            "Final_Test_Speed": row["Final_Test_Speed"]
        })
        
    # save carrier data to structure
    datastruct.append(carrier_ds)
    
    
# convert data structure to json
jsondump = json.dumps(datastruct,sort_keys=True,indent=4)
print(jsondump)

[
    {
        "Carrier": "A", 
        "plots": [
            {
                "Activity": "Drive", 
                "Data_Direction": "Download", 
                "Final_Test_Speed": 54111.313160000005
            }, 
            {
                "Activity": "Drive", 
                "Data_Direction": "Download", 
                "Final_Test_Speed": 8316.654272
            }, 
            {
                "Activity": "Drive", 
                "Data_Direction": "Download", 
                "Final_Test_Speed": 8483.993072
            }, 
            {
                "Activity": "Drive", 
                "Data_Direction": "Download", 
                "Final_Test_Speed": 39363.95864
            }, 
            {
                "Activity": "Drive", 
                "Data_Direction": "Download", 
                "Final_Test_Speed": 6098.378905
            }, 
            {
                "Activity": "Drive", 
                "Data_Direction": "Download", 
                "Final_Test