In [1]:
import pandas as pd
import numpy as np

from datetime import datetime
import sqlite3
import itertools

In [3]:
df = pd.read_csv('data/Crash_Reporting_-_Drivers_Data.csv', low_memory=False)

cols_to_drop = ['Report Number', 'Local Case Number','Latitude','Longitude',
                'Off-Road Description', 'Municipality','Related Non-Motorist',
                'Non-Motorist Substance Abuse', 'Circumstance']

df.drop(cols_to_drop, axis=1, inplace=True)

In [4]:
col_datatypes = {}

for i in df.columns:
    col_datatypes[i] = type(i)

In [5]:
d = {'Agency Name': str,
 'ACRS Report Type': str,
 'Crash Date/Time': 'datetime64[ns]',
 'Route Type': str,
 'Road Name': str,
 'Cross-Street Type': str,
 'Cross-Street Name': str,
 'Collision Type': str,
 'Weather': str,
 'Surface Condition': str,
 'Light': str,
 'Traffic Control': str,
 'Driver Substance Abuse': str,
 'Person ID': 'ignore',
 'Driver At Fault': str,
 'Injury Severity': str,
 'Driver Distracted By': str,
 'Drivers License State': str,
 'Vehicle ID': 'ignore',
 'Vehicle Damage Extent': str,
 'Vehicle First Impact Location': str,
 'Vehicle Second Impact Location': str,
 'Vehicle Body Type': str,
 'Vehicle Movement': str,
 'Vehicle Continuing Dir': str,
 'Vehicle Going Dir': str,
 'Speed Limit': float,
 'Driverless Vehicle': 'ignore',
 'Parked Vehicle': 'ignore',
 'Vehicle Year': float,
 'Vehicle Make': str,
 'Vehicle Model': str,
 'Equipment Problems': 'ignore',
 'Location': str}

In [6]:
for key, values in d.items():
    if values != 'ignore':
        df[key] = df[key].astype(values)
    else:
        df.drop(key, axis=1, inplace=True)

df['Speed_Limit'] = df['Speed Limit']
df['Crash Date'] = df['Crash Date/Time'].dt.date
df['Crash Time'] = df['Crash Date/Time'].dt.time
df.drop('Crash Date/Time', axis=1, inplace=True)

### SQL Queries

In [7]:
# Connect to an in-memory SQLite database
conn = sqlite3.connect(':memory:')

# Insert the Pandas DataFrame into the SQLite database
df.to_sql('crash_data', conn, index=False)

170968

In [8]:
query = "SELECT Speed_Limit from crash_data limit 10"
result = pd.read_sql_query(query, conn)
print(result)

   Speed_Limit
0         15.0
1         40.0
2         35.0
3         40.0
4         35.0
5         30.0
6         25.0
7         35.0
8         35.0
9         30.0


### 1. What is the sum, average, count of a given column

In [9]:
# what is the average speed limit
col = "Speed_Limit"
query = f"SELECT avg({col}) as average_speed from crash_data"
pd.read_sql_query(query,conn)

Unnamed: 0,average_speed
0,32.551881


In [10]:
col = "Speed_Limit"
query = f"SELECT max({col}) as max_speed, min({col}) as min_speed from crash_data"
pd.read_sql_query(query,conn)

Unnamed: 0,max_speed,min_speed
0,75.0,0.0


In [11]:
num_functions = ['avg', 'min', 'max', 'sum']

for func in num_functions:
    for col in df.select_dtypes(include=['float','int']).columns:
        print(f"what is the {func} {col}")

what is the avg Speed Limit
what is the avg Vehicle Year
what is the avg Speed_Limit
what is the min Speed Limit
what is the min Vehicle Year
what is the min Speed_Limit
what is the max Speed Limit
what is the max Vehicle Year
what is the max Speed_Limit
what is the sum Speed Limit
what is the sum Vehicle Year
what is the sum Speed_Limit


In [27]:
pd.to_datetime(df["Crash Time"])

TypeError: <class 'datetime.time'> is not convertible to datetime

In [21]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 167799 entries, 0 to 167798
Data columns (total 31 columns):
 #   Column                          Non-Null Count   Dtype  
---  ------                          --------------   -----  
 0   Agency Name                     167799 non-null  object 
 1   ACRS Report Type                167799 non-null  object 
 2   Route Type                      167799 non-null  object 
 3   Road Name                       167799 non-null  object 
 4   Cross-Street Type               167799 non-null  object 
 5   Cross-Street Name               167799 non-null  object 
 6   Collision Type                  167799 non-null  object 
 7   Weather                         167799 non-null  object 
 8   Surface Condition               167799 non-null  object 
 9   Light                           167799 non-null  object 
 10  Traffic Control                 167799 non-null  object 
 11  Driver Substance Abuse          167799 non-null  object 
 12  Driver At Fault 

In [20]:
cat_cols = 
num_cols = 
date_cols = 
bin_cols = 

0         Yes
1         Yes
2         Yes
3          No
4         Yes
         ... 
167794     No
167795    Yes
167796     No
167797     No
167798    Yes
Name: Driver At Fault, Length: 167799, dtype: object

In [None]:
num_cols = ['speed','year','cost']

combinations = list(itertools.combinations(num_cols, 2))
combinations

In [None]:
all_perm = list(itertools.permutations(num_functions, 2))
# all_perm.extend([(i,i) for i in num_functions])

In [None]:
all_perm

In [None]:
for f1, f2 in all_perm:
    for c1, c2 in combinations:
#         print(f"what is {f1} and {f2} {c1}")
#         print(f"what is {f1} and {f2} {c2}")
#         print(f"what is {f1} {c2} and {f2} {c1}")
#         print(f"what is {f1} {c1} and {f2} {c2}")
        print(f"{f1}{f2}{c1}")
        print(f"{f1}{f2}{c2}")
        print(f"{f1}{c2}{f2}{c1}")
        print(f"{f1}{c1}{f2}{c2}")

In [None]:
what is avg and min speed
what is avg and min year
what is avg year and min speed
what is avg speed and min year
what is avg and min speed
what is avg and min cost
what is avg cost and min speed
what is avg speed and min cost
what is avg and min year
what is avg and min cost
what is avg cost and min year
what is avg year and min cost
what is avg and max speed
what is avg and max year
what is avg year and max speed
what is avg speed and max year
what is avg and max speed
what is avg and max cost
what is avg cost and max speed
what is avg speed and max cost
what is avg and max year
what is avg and max cost
what is avg cost and max year
what is avg year and max cost
what is avg and sum speed
what is avg and sum year
what is avg year and sum speed
what is avg speed and sum year
what is avg and sum speed
what is avg and sum cost
what is avg cost and sum speed
what is avg speed and sum cost
what is avg and sum year
what is avg and sum cost
what is avg cost and sum year
what is avg year and sum cost
what is min and avg speed
what is min and avg year
what is min year and avg speed
what is min speed and avg year
what is min and avg speed
what is min and avg cost
what is min cost and avg speed
what is min speed and avg cost
what is min and avg year
what is min and avg cost
what is min cost and avg year
what is min year and avg cost
what is min and max speed
what is min and max year
what is min year and max speed
what is min speed and max year
what is min and max speed
what is min and max cost
what is min cost and max speed
what is min speed and max cost
what is min and max year
what is min and max cost
what is min cost and max year
what is min year and max cost
what is min and sum speed
what is min and sum year
what is min year and sum speed
what is min speed and sum year
what is min and sum speed
what is min and sum cost
what is min cost and sum speed
what is min speed and sum cost
what is min and sum year
what is min and sum cost
what is min cost and sum year
what is min year and sum cost
what is max and avg speed
what is max and avg year
what is max year and avg speed
what is max speed and avg year
what is max and avg speed
what is max and avg cost
what is max cost and avg speed
what is max speed and avg cost
what is max and avg year
what is max and avg cost
what is max cost and avg year
what is max year and avg cost
what is max and min speed
what is max and min year
what is max year and min speed
what is max speed and min year
what is max and min speed
what is max and min cost
what is max cost and min speed
what is max speed and min cost
what is max and min year
what is max and min cost
what is max cost and min year
what is max year and min cost
what is max and sum speed
what is max and sum year
what is max year and sum speed
what is max speed and sum year
what is max and sum speed
what is max and sum cost
what is max cost and sum speed
what is max speed and sum cost
what is max and sum year
what is max and sum cost
what is max cost and sum year
what is max year and sum cost
what is sum and avg speed
what is sum and avg year
what is sum year and avg speed
what is sum speed and avg year
what is sum and avg speed
what is sum and avg cost
what is sum cost and avg speed
what is sum speed and avg cost
what is sum and avg year
what is sum and avg cost
what is sum cost and avg year
what is sum year and avg cost
what is sum and min speed
what is sum and min year
what is sum year and min speed
what is sum speed and min year
what is sum and min speed
what is sum and min cost
what is sum cost and min speed
what is sum speed and min cost
what is sum and min year
what is sum and min cost
what is sum cost and min year
what is sum year and min cost
what is sum and max speed
what is sum and max year
what is sum year and max speed
what is sum speed and max year
what is sum and max speed
what is sum and max cost
what is sum cost and max speed
what is sum speed and max cost
what is sum and max year
what is sum and max cost
what is sum cost and max year
what is sum year and max cost
what is avg and avg speed
what is avg and avg year
what is avg year and avg speed
what is avg speed and avg year
what is avg and avg speed
what is avg and avg cost
what is avg cost and avg speed
what is avg speed and avg cost
what is avg and avg year
what is avg and avg cost
what is avg cost and avg year
what is avg year and avg cost
what is min and min speed
what is min and min year
what is min year and min speed
what is min speed and min year
what is min and min speed
what is min and min cost
what is min cost and min speed
what is min speed and min cost
what is min and min year
what is min and min cost
what is min cost and min year
what is min year and min cost
what is max and max speed
what is max and max year
what is max year and max speed
what is max speed and max year
what is max and max speed
what is max and max cost
what is max cost and max speed
what is max speed and max cost
what is max and max year
what is max and max cost
what is max cost and max year
what is max year and max cost
what is sum and sum speed
what is sum and sum year
what is sum year and sum speed
what is sum speed and sum year
what is sum and sum speed
what is sum and sum cost
what is sum cost and sum speed
what is sum speed and sum cost
what is sum and sum year
what is sum and sum cost
what is sum cost and sum year
what is sum year and sum cost
​