Predicting counties with the highest Max AQI in 2027:

In [48]:
import pandas as pd

dataset = pd.read_csv('./Datasets/all_years_aqi_by_county.csv')
dataset["CountyState"] = dataset["County"] + ", " + dataset["State"]

# Inputs to predict a given value
features = ["CountyState", "Year"]

# Value to be predicted
predictor = "Max AQI"

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import root_mean_squared_error

# Split the dataset into features and predictors
features_df = pd.DataFrame(dataset[features])
predictor_df = pd.DataFrame(dataset[predictor])

# Encode County, State labels
le = LabelEncoder()
features_df["CountyState"] = le.fit_transform(features_df["CountyState"])

# Make the train test split
train_ft, test_ft, train_pr, test_pr = train_test_split(features_df, predictor_df, test_size=.2)

# Fit a basic Linear Regression and check RMSE
regression = LinearRegression().fit(train_ft, train_pr)
print(root_mean_squared_error(test_pr, regression.predict(test_ft)))

327.23680042779955


In [49]:
all_counties = features_df['CountyState'].unique()
predict_data_df = pd.DataFrame()
predict_data_df['CountyState'] = all_counties
predict_data_df['Year'] = 2027

# Predicted value (Max AQI)
predict_data_df['Prediction'] = regression.predict(predict_data_df)
predict_data_df['CountyState'] = le.inverse_transform(predict_data_df['CountyState'])

In [50]:
# print(predict_data_df)\
print("----------------------------------------")
print("The 5 counties with the highest predicted Max AQI in 2027")
print(predict_data_df.sort_values(by=['Prediction'], ascending=False).head(5))

print("----------------------------------------")
print("The 5 counties with the lowest predicted Max AQI in 2027")
print(predict_data_df.sort_values(by=['Prediction']).head(5))

print("----------------------------------------")
print("Counties sorted by predicted Max AQI")
print(predict_data_df.sort_values(by=['Prediction']))
print("----------------------------------------")


----------------------------------------
The 5 counties with the highest predicted Max AQI in 2027
                    CountyState  Year  Prediction
1021  Abbeville, South Carolina  2027  120.553697
99                   Ada, Idaho  2027  120.552645
1395            Adair, Oklahoma  2027  120.551594
60              Adams, Colorado  2027  120.550542
100             Adams, Illinois  2027  120.549491
----------------------------------------
The 5 counties with the lowest predicted Max AQI in 2027
                 CountyState  Year  Prediction
17             Yuma, Arizona  2027  118.934321
1465  Yukon-Koyukuk , Alaska  2027  118.935373
752         Yuba, California  2027  118.936424
886           York, Virginia  2027  118.937476
376     York, South Carolina  2027  118.938527
----------------------------------------
Counties sorted by predicted Max AQI
                    CountyState  Year  Prediction
17                Yuma, Arizona  2027  118.934321
1465     Yukon-Koyukuk , Alaska  2027  118.