In [None]:
import pandas as pd

In [19]:
nox_emissions_df = pd.read_excel('../data//LAEI2013_Emissions_Summary-NOx_v1.1.xlsx',
                                 sheet_name='NOx by Grid Exact Cut',
                                 skiprows=3)
pm25_emissions_df = pd.read_excel('../data//LAEI2013_Emissions_Summary-PM2.5_v1.1.xlsx',
                                  sheet_name='PM2.5 by Grid Exact Cut',
                                  skiprows=3)
pm10_emissions_df = pd.read_excel('../data//LAEI2013_Emissions_Summary-PM10_v1.1.xlsx',
                                  sheet_name='PM10 by Grid Exact Cut',
                                  skiprows=3)

nox_concentrations2013_df = pd.read_csv('../data/PostLAEI2013_2013_NOx.csv')
nox_concentrations2020_df = pd.read_csv('../data/2020_Met2013_LAEI2013_NOx.csv')
nox_concentrations2025_df = pd.read_csv('../data/2025_Met2013_LAEI2013_NOx.csv')
nox_concentrations2030_df = pd.read_csv('../data/2030_Met2013_LAEI2013_NOx.csv')

pm25_concentrations2013_df = pd.read_csv('../data/PostLAEI2013_2013_PM25.csv')
pm25_concentrations2020_df = pd.read_csv('../data/2020_Met2013_LAEI2013_PM25.csv')
pm25_concentrations2025_df = pd.read_csv('../data/2025_Met2013_LAEI2013_PM25.csv')
pm25_concentrations2030_df = pd.read_csv('../data/2030_Met2013_LAEI2013_PM25.csv')

pm10_concentrations2013_df = pd.read_csv('../data/PostLAEI2013_2013_PM10.csv')
pm10_concentrations2020_df = pd.read_csv('../data/2020_Met2013_LAEI2013_PM10.csv')
pm10_concentrations2025_df = pd.read_csv('../data/2025_Met2013_LAEI2013_PM10.csv')
pm10_concentrations2030_df = pd.read_csv('../data/2030_Met2013_LAEI2013_PM10.csv')

nox_concentrations_df = pd.concat([nox_concentrations2013_df,
                                   nox_concentrations2020_df,
                                   nox_concentrations2025_df,
                                   nox_concentrations2030_df],
                                  ignore_index=True)

pm25_concentrations_df = pd.concat([pm25_concentrations2013_df,
                                    pm25_concentrations2020_df,
                                    pm25_concentrations2025_df,
                                    pm25_concentrations2030_df],
                                   ignore_index=True)

pm10_concentrations_df = pd.concat([pm10_concentrations2013_df,
                                    pm10_concentrations2020_df,
                                    pm10_concentrations2025_df,
                                    pm10_concentrations2030_df],
                                   ignore_index=True)

nox_emissions_df.columns = nox_emissions_df.columns.str.replace(' ', '_')
pm25_emissions_df.columns = pm25_emissions_df.columns.str.replace(' ', '_')
pm10_emissions_df.columns = pm10_emissions_df.columns.str.replace(' ', '_')

nox_concentrations_df = nox_concentrations_df.rename(columns={
    'conct': 'NOx_concentration',
    'x': 'Easting',
    'y': 'Northing',
    'year': 'Year'
})

pm25_concentrations_df = pm25_concentrations_df.rename(columns={
    'conct': 'PM25_concentration',
    'x': 'Easting',
    'y': 'Northing',
    'year': 'Year'
})

pm10_concentrations_df = pm10_concentrations_df.rename(columns={
    'conct': 'PM10_concentration',
    'x': 'Easting',
    'y': 'Northing',
    'year': 'Year'
})

nox_merged_df = pd.merge(nox_concentrations_df, nox_emissions_df, on=['Easting', 'Northing', 'Year'], how='inner')
pm25_merged_df = pd.merge(pm25_concentrations_df, pm25_emissions_df, on=['Easting', 'Northing', 'Year'], how='inner')
pm10_merged_df = pd.merge(pm10_concentrations_df, pm10_emissions_df, on=['Easting', 'Northing', 'Year'], how='inner')

nox_merged_df.to_csv("../data/nox_merged_output.csv")
pm25_merged_df.to_csv("../data/pm25_merged_output.csv")
pm10_merged_df.to_csv("../data/pm10_merged_output.csv")

In [20]:
### Training the models

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

test_size = 0.2
random_state = 42

# NOx Concentrations Model
nox_numerical_columns = nox_merged_df.select_dtypes(include=['float64', 'int64']).columns
nox_features = nox_merged_df[nox_numerical_columns].drop(columns=['NOx_concentration'])
nox_target = nox_merged_df['NOx_concentration']

nox_X_train, nox_X_test, nox_y_train, nox_y_test = train_test_split(nox_features, nox_target, test_size=test_size,
                                                                    random_state=random_state)
nox_model = LinearRegression()
nox_model.fit(nox_X_train, nox_y_train)
nox_y_pred = nox_model.predict(nox_X_test)

mse = mean_squared_error(nox_y_test, nox_y_pred)
print(f'NOx Concentrations Model Mean Squared Error: {mse}')

# PM2.5 Concentrations Model
pm25_numerical_columns = pm25_merged_df.select_dtypes(include=['float64', 'int64']).columns
pm25_features = pm25_merged_df[pm25_numerical_columns].drop(columns=['PM25_concentration'])
pm25_target = pm25_merged_df['PM25_concentration']

pm25_X_train, pm25_X_test, pm25_y_train, pm25_y_test = train_test_split(pm25_features, pm25_target, test_size=test_size,
                                                                        random_state=random_state)
pm25_model = LinearRegression()
pm25_model.fit(pm25_X_train, pm25_y_train)
pm25_y_pred = pm25_model.predict(pm25_X_test)

mse = mean_squared_error(pm25_y_test, pm25_y_pred)
print(f'PM2.5 Concentrations Model Mean Squared Error: {mse}')

# PM10 Concentrations Model
pm10_numerical_columns = pm10_merged_df.select_dtypes(include=['float64', 'int64']).columns
pm10_features = pm10_merged_df[pm10_numerical_columns].drop(columns=['PM10_concentration'])
pm10_target = pm10_merged_df['PM10_concentration']

pm10_X_train, pm10_X_test, pm10_y_train, pm10_y_test = train_test_split(pm10_features, pm10_target, test_size=test_size,
                                                                        random_state=random_state)
pm10_model = LinearRegression()
pm10_model.fit(pm10_X_train, pm10_y_train)
pm10_y_pred = pm10_model.predict(pm10_X_test)

mse = mean_squared_error(pm10_y_test, pm10_y_pred)
print(f'PM10 Concentrations Model Mean Squared Error: {mse}')

NOx Concentrations Model Mean Squared Error: 128.26714379190926
PM2.5 Concentrations Model Mean Squared Error: 0.30276686242650525
PM10 Concentrations Model Mean Squared Error: 1.6071645008775757
