<center><h1><b>Datasets Combination</b></h1></center>


In [1]:
import os
import pandas as pd

In [3]:
# Load Crime Data 
crime_folder = "Processed Crime Data"

burglary_df = pd.read_csv(os.path.join(crime_folder, "Burglary.csv"))
robbery_df = pd.read_csv(os.path.join(crime_folder, "Robbery.csv"))
theft_df = pd.read_csv(os.path.join(crime_folder, "Theft.csv"))

In [4]:
# Load All Weather Data 
weather_folder = "Processed Weather Data"
weather_files = [f for f in os.listdir(weather_folder) if f.endswith(".csv")]

In [5]:
# Load and concatenate all weather files into one DataFrame
weather_df_list = []

for file in weather_files:
    df = pd.read_csv(os.path.join(weather_folder, file))
    weather_df_list.append(df)

In [6]:
# Combine all borough weather files into one DataFrame
weather_df = pd.concat(weather_df_list, ignore_index=True)

In [7]:
# Preview results
print("Crime data shapes:")
print("Burglary:", burglary_df.shape)
print("Robbery:", robbery_df.shape)
print("Theft:", theft_df.shape)

print("\nWeather data shape:", weather_df.shape)

Crime data shapes:
Burglary: (547, 15)
Robbery: (547, 15)
Theft: (547, 15)

Weather data shape: (7111, 10)


In [9]:
weather_df.head()

Unnamed: 0,name,datetime,tempmax,tempmin,temp,humidity,precip,snow,windspeed,visibility
0,Camden,2019-01-01,11.1,6.5,8.8,74.1,0.0,0.0,11.0,21.9
1,Camden,2019-01-02,6.6,5.1,5.8,68.9,0.0,0.0,7.8,16.1
2,Camden,2019-01-03,5.6,4.0,5.1,66.5,0.0,0.0,5.1,19.9
3,Camden,2019-01-04,5.0,2.5,3.6,70.1,0.0,0.0,7.6,8.5
4,Camden,2019-01-05,6.1,4.0,5.1,74.0,0.0,0.0,8.9,11.2


In [10]:
# Drop "Total Crimes" columns
burglary_df = burglary_df.drop(columns=["Total Crimes"])
robbery_df = robbery_df.drop(columns=["Total Crimes"])
theft_df = theft_df.drop(columns=["Total Crimes"])

In [11]:
# Reshape to long format 
burglary_long = burglary_df.melt(id_vars="Date", var_name="Borough", value_name="Burglary")
robbery_long = robbery_df.melt(id_vars="Date", var_name="Borough", value_name="Robbery")
theft_long = theft_df.melt(id_vars="Date", var_name="Borough", value_name="Theft")

In [12]:
# Merge crime types 
crime_df = burglary_long.merge(robbery_long, on=["Date", "Borough"])
crime_df = crime_df.merge(theft_long, on=["Date", "Borough"])
crime_df["Total_Crime"] = crime_df["Burglary"] + crime_df["Robbery"] + crime_df["Theft"]

In [13]:
weather_df = weather_df.rename(columns={"name": "Borough", "datetime": "Date"})

In [14]:
# Ensure date formats match
crime_df["Date"] = pd.to_datetime(crime_df["Date"])
weather_df["Date"] = pd.to_datetime(weather_df["Date"])

In [15]:
# Final Merge 
final_df = crime_df.merge(weather_df, on=["Date", "Borough"])

In [16]:
# Preview 
final_df.head()

Unnamed: 0,Date,Borough,Burglary,Robbery,Theft,Total_Crime,tempmax,tempmin,temp,humidity,precip,snow,windspeed,visibility
0,2019-01-01,Camden,9,3,38,50,11.1,6.5,8.8,74.1,0.0,0.0,11.0,21.9
1,2019-01-02,Camden,3,4,39,46,6.6,5.1,5.8,68.9,0.0,0.0,7.8,16.1
2,2019-01-03,Camden,12,2,42,56,5.6,4.0,5.1,66.5,0.0,0.0,5.1,19.9
3,2019-01-04,Camden,16,7,38,61,5.0,2.5,3.6,70.1,0.0,0.0,7.6,8.5
4,2019-01-05,Camden,8,1,47,56,6.1,4.0,5.1,74.0,0.0,0.0,8.9,11.2


In [18]:
# Confirm final dataset shape and structure 
print("Final dataset shape:", final_df.shape)
print("Columns:", final_df.columns.tolist())
print("Date range:", final_df["Date"].min(), "to", final_df["Date"].max())
print("Number of boroughs:", final_df["Borough"].nunique())
print("Sample boroughs:", final_df["Borough"].unique()[:5])

Final dataset shape: (7111, 14)
Columns: ['Date', 'Borough', 'Burglary', 'Robbery', 'Theft', 'Total_Crime', 'tempmax', 'tempmin', 'temp', 'humidity', 'precip', 'snow', 'windspeed', 'visibility']
Date range: 2019-01-01 00:00:00 to 2020-06-30 00:00:00
Number of boroughs: 13
Sample boroughs: ['Camden' 'City of London' 'Greenwich' 'Hackney' 'Hammersmith and Fulham']


In [17]:
final_df.to_csv("Crime_Weather_Dataset.csv", index=False)