Import data from US_Accidents_March23_1_percent.csv, clean it and import to new csv file.  
Making one file where all NaN numbers are deleted: US_Accidents_March23_1_percent_cleaned.csv  
Making another file where all NaN numbers converted to 0: One_Percent_Null_To_Zero.csv   
We can use US_Accidents_March23_1_percent_cleaned.csv for our graphs and One_Percent_Null_To_Zero.csv if we want all data.  
Most of the NaN values were found in latitude and longitude cells, so it's OK if we delete whose records.  

In [9]:
# Import Dependencies
import os
import pandas as pd
from pathlib import Path


In [10]:
#Read the csv file and make a data frame
US_Accidents_path = Path("data/US_Accidents_March23_1_percent.csv")
US_Accidents_df = pd.read_csv(US_Accidents_path)
US_Accidents_df.head()

Unnamed: 0,ID,Source,Severity,Start_Time,End_Time,Start_Lat,Start_Lng,End_Lat,End_Lng,Distance(mi),...,Roundabout,Station,Stop,Traffic_Calming,Traffic_Signal,Turning_Loop,Sunrise_Sunset,Civil_Twilight,Nautical_Twilight,Astronomical_Twilight
0,A-100,Source2,2,2016-02-11 08:13:24,2016-02-11 08:43:24,39.749916,-84.139359,,,0.01,...,False,False,False,False,False,False,Day,Day,Day,Day
1,A-200,Source2,2,2016-02-16 11:10:17,2016-02-16 11:40:17,39.79166,-84.169342,,,0.0,...,False,False,False,False,False,False,Day,Day,Day,Day
2,A-300,Source2,2,2016-02-19 16:16:51,2016-02-19 17:01:51,39.765259,-84.137802,,,0.01,...,False,False,False,False,True,False,Day,Day,Day,Day
3,A-400,Source2,3,2016-02-26 16:15:10,2016-02-26 16:45:10,39.91468,-83.016907,,,0.01,...,False,False,False,False,False,False,Day,Day,Day,Day
4,A-500,Source2,2,2016-03-04 14:55:55,2016-03-04 15:40:55,39.747753,-84.205582,,,0.01,...,False,False,False,False,False,False,Day,Day,Day,Day


In [17]:
#Create new JSON file for US_Accidents_March23_1_percent
# Load the CSV file
csv_file_path = 'data/US_Accidents_March23_1_percent.csv'
df = pd.read_csv(csv_file_path)

# Convert to JSON
json_file_path = 'data/US_Accidents_March23_1_percent_JSON.json'
df.to_json(json_file_path, orient='records', lines=True)

In [11]:
#Get all columns
US_Accidents_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 77283 entries, 0 to 77282
Data columns (total 46 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   ID                     77283 non-null  object 
 1   Source                 77283 non-null  object 
 2   Severity               77283 non-null  int64  
 3   Start_Time             77283 non-null  object 
 4   End_Time               77283 non-null  object 
 5   Start_Lat              77283 non-null  float64
 6   Start_Lng              77283 non-null  float64
 7   End_Lat                43256 non-null  float64
 8   End_Lng                43256 non-null  float64
 9   Distance(mi)           77283 non-null  float64
 10  Description            77283 non-null  object 
 11  Street                 77183 non-null  object 
 12  City                   77280 non-null  object 
 13  County                 77283 non-null  object 
 14  State                  77283 non-null  object 
 15  Zi

In [7]:
#Deleting columns that we don't need
#Columns to delete:
#<!-- Source, End_Lat, End_Lng, Distance(mi), Country, Weather_Timestamp, Civil_Twilight, Nautical_Twilight, Astronomical_Twilight -->
US_Accidents_df_copy = US_Accidents_df.copy()

columns_to_delete = [
    'Source', 'End_Lat', 'End_Lng', 'Distance(mi)', 
    'Country', 'Weather_Timestamp', 'Civil_Twilight',
     'Nautical_Twilight', 'Astronomical_Twilight'
]

US_Accidents_df_copy = US_Accidents_df_copy.drop(columns=columns_to_delete)
US_Accidents_df_copy.head()

Unnamed: 0,ID,Severity,Start_Time,End_Time,Start_Lat,Start_Lng,Description,Street,City,County,...,Junction,No_Exit,Railway,Roundabout,Station,Stop,Traffic_Calming,Traffic_Signal,Turning_Loop,Sunrise_Sunset
0,A-100,2,2016-02-11 08:13:24,2016-02-11 08:43:24,39.749916,-84.139359,Accident on US-35 Westbound at Smithville Rd. ...,US-35 E,Dayton,Montgomery,...,False,False,False,False,False,False,False,False,False,Day
1,A-200,2,2016-02-16 11:10:17,2016-02-16 11:40:17,39.79166,-84.169342,Accident on Kuntz Rd at Janney Rd. Expect delays.,Kuntz Rd,Dayton,Montgomery,...,False,False,False,False,False,False,False,False,False,Day
2,A-300,2,2016-02-19 16:16:51,2016-02-19 17:01:51,39.765259,-84.137802,Accident on Smithville Rd at 3rd St.,Smithville Rd,Dayton,Montgomery,...,False,False,False,False,False,False,False,True,False,Day
3,A-400,3,2016-02-26 16:15:10,2016-02-26 16:45:10,39.91468,-83.016907,Accident on I-71 at Exit 104 OH-104 Frank Rd.,I-71 S,Columbus,Franklin,...,False,False,False,False,False,False,False,False,False,Day
4,A-500,2,2016-03-04 14:55:55,2016-03-04 15:40:55,39.747753,-84.205582,Accident on US-35 at I-75.,I-75 S,Dayton,Montgomery,...,False,False,False,False,False,False,False,False,False,Day


In [12]:
#Delete all NaN data
US_Accidents_cleaned = US_Accidents_df_copy.dropna()
US_Accidents_cleaned.head()

Unnamed: 0,ID,Severity,Start_Time,End_Time,Start_Lat,Start_Lng,Description,Street,City,County,...,Junction,No_Exit,Railway,Roundabout,Station,Stop,Traffic_Calming,Traffic_Signal,Turning_Loop,Sunrise_Sunset
92,A-9300,2,2017-01-02 19:58:45,2017-01-02 20:28:10,39.321247,-120.830399,Multi-vehicle accident on CA-20 Eastbound at J...,State Highway 20,Nevada City,Nevada,...,False,False,False,False,False,False,False,False,False,Night
118,A-11900,2,2017-01-19 21:54:34,2017-01-19 22:38:46,39.321449,-120.849106,Accident on CA-20 Eastbound in Nevada City.,State Highway 20,Nevada City,Nevada,...,False,False,False,False,False,False,False,False,False,Night
448,A-44900,3,2016-06-28 09:26:50,2016-06-28 10:10:00,34.414745,-118.579155,#2 lane blocked and left hand shoulder blocked...,Golden State Fwy S,Valencia,Los Angeles,...,False,False,False,False,False,False,False,False,False,Day
462,A-46300,3,2016-07-05 00:10:32,2016-07-05 00:55:32,34.499092,-118.62635,Accident on I-5 Southbound at Exits 176 176B T...,Golden State Fwy S,Castaic,Los Angeles,...,True,False,False,False,False,False,False,False,False,Night
465,A-46600,2,2016-07-05 20:36:05,2016-07-05 21:21:05,34.352234,-118.548767,Accident and fallen power cables on The Old Rd...,The Old Rd,Newhall,Los Angeles,...,False,False,False,False,False,False,False,False,False,Night


In [13]:
US_Accidents_cleaned.info()

<class 'pandas.core.frame.DataFrame'>
Index: 52141 entries, 92 to 77282
Data columns (total 37 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   ID                 52141 non-null  object 
 1   Severity           52141 non-null  int64  
 2   Start_Time         52141 non-null  object 
 3   End_Time           52141 non-null  object 
 4   Start_Lat          52141 non-null  float64
 5   Start_Lng          52141 non-null  float64
 6   Description        52141 non-null  object 
 7   Street             52141 non-null  object 
 8   City               52141 non-null  object 
 9   County             52141 non-null  object 
 10  State              52141 non-null  object 
 11  Zipcode            52141 non-null  object 
 12  Timezone           52141 non-null  object 
 13  Airport_Code       52141 non-null  object 
 14  Temperature(F)     52141 non-null  float64
 15  Wind_Chill(F)      52141 non-null  float64
 16  Humidity(%)        52141 n

In [14]:
#Create new csv file for cleaned data
US_Accidents_cleaned.to_csv("data/US_Accidents_March23_1_percent_cleaned.csv", index=False)

In [15]:
#Create new JSON file for cleaned data
# Load the CSV file
csv_file_path = 'data/US_Accidents_March23_1_percent_cleaned.csv'
df = pd.read_csv(csv_file_path)

# Convert to JSON
json_file_path = 'data/US_Accidents_March23_1_percent_cleaned.json'
df.to_json(json_file_path, orient='records', lines=True)

In [None]:
#Making new data with all NaN converted to 0. When we delete all NaN we delete the whole row. 
# Making data with these rows but NaN converted to 0. Don't use this for maps, as you will have coordinates [0,0]
US_Accidents_Null_To_Zero = US_Accidents_df.copy()
US_Accidents_Null_To_Zero = US_Accidents_Null_To_Zero.fillna(0)
US_Accidents_Null_To_Zero.head()


Unnamed: 0,ID,Source,Severity,Start_Time,End_Time,Start_Lat,Start_Lng,End_Lat,End_Lng,Distance(mi),...,Roundabout,Station,Stop,Traffic_Calming,Traffic_Signal,Turning_Loop,Sunrise_Sunset,Civil_Twilight,Nautical_Twilight,Astronomical_Twilight
0,A-100,Source2,2,2016-02-11 08:13:24,2016-02-11 08:43:24,39.749916,-84.139359,0.0,0.0,0.01,...,False,False,False,False,False,False,Day,Day,Day,Day
1,A-200,Source2,2,2016-02-16 11:10:17,2016-02-16 11:40:17,39.79166,-84.169342,0.0,0.0,0.0,...,False,False,False,False,False,False,Day,Day,Day,Day
2,A-300,Source2,2,2016-02-19 16:16:51,2016-02-19 17:01:51,39.765259,-84.137802,0.0,0.0,0.01,...,False,False,False,False,True,False,Day,Day,Day,Day
3,A-400,Source2,3,2016-02-26 16:15:10,2016-02-26 16:45:10,39.91468,-83.016907,0.0,0.0,0.01,...,False,False,False,False,False,False,Day,Day,Day,Day
4,A-500,Source2,2,2016-03-04 14:55:55,2016-03-04 15:40:55,39.747753,-84.205582,0.0,0.0,0.01,...,False,False,False,False,False,False,Day,Day,Day,Day


In [25]:
US_Accidents_Null_To_Zero.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 77283 entries, 0 to 77282
Data columns (total 46 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   ID                     77283 non-null  object 
 1   Source                 77283 non-null  object 
 2   Severity               77283 non-null  int64  
 3   Start_Time             77283 non-null  object 
 4   End_Time               77283 non-null  object 
 5   Start_Lat              77283 non-null  float64
 6   Start_Lng              77283 non-null  float64
 7   End_Lat                77283 non-null  float64
 8   End_Lng                77283 non-null  float64
 9   Distance(mi)           77283 non-null  float64
 10  Description            77283 non-null  object 
 11  Street                 77283 non-null  object 
 12  City                   77283 non-null  object 
 13  County                 77283 non-null  object 
 14  State                  77283 non-null  object 
 15  Zi

In [26]:
#Create new csv file for cleaned data
US_Accidents_Null_To_Zero.to_csv("data/One_Percent_Null_To_Zero.csv", index=False)

In [16]:
#Create new JSON file for cleaned data
# Load the CSV file
csv_file_path = 'data/One_percent_Null_To_Zero.csv'
df = pd.read_csv(csv_file_path)

# Convert to JSON
json_file_path = 'data/One_percent_Null_To_Zero_JSON.json'
df.to_json(json_file_path, orient='records', lines=True)