In [None]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.ensemble import RandomForestRegressor

In [None]:
df=pd.read_csv('/kaggle/input/air-quality-data-in-india/city_hour.csv')

In [None]:
df.info()

In [None]:
df.isnull().sum()

In [None]:
df.describe()

In [None]:
df = df.fillna(method='ffill')

In [None]:
df=df.dropna()

In [None]:
features = ['PM2.5', 'PM10', 'NO2', 'CO']
X = df[features]
y = df['AQI']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
#model = LinearRegression()
#model.fit(X_train, y_train)
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [None]:
y_pred = model.predict(X_test)

In [None]:
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("\nModel Performance:")
print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("R^2 Score:", r2)

In [None]:
accuracy = model.score(X_test, y_test) * 100
print("Overall model accuracy:", accuracy, "%")

In [None]:
future_features = pd.DataFrame({
    'PM2.5': np.random.uniform(10, 150, 24),
    'PM10': np.random.uniform(20, 200, 24),
    'NO2': np.random.uniform(5, 80, 24),
    'CO': np.random.uniform(0.1, 2.5, 24)
})

In [None]:
future_predictions = model.predict(future_features)

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(y_test.values[:50], label='Actual AQI', marker='o')
plt.plot(y_pred[:50], label='Predicted AQI', marker='x')
plt.xlabel('Sample Index')
plt.ylabel('AQI')
plt.title('Actual vs Predicted AQI')
plt.legend()
plt.grid()
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(future_predictions, label='Predicted AQI (Next 24 Hours)', color='red')
plt.xlabel('Hour Index')
plt.ylabel('AQI')
plt.title('Predicted AQI for the Next 24 Hours')
plt.legend()
plt.grid()
plt.show()