In [2]:
# Import necessary libraries
import requests
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import sqlite3
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from keras.models import Sequential
from keras.layers import Dense, LSTM
import geopandas as gpd
import folium

# Web scraping for AQI data
url = "https://waqi.info/"
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
aqi_data = []

for row in soup.find_all('tr'):
    aqi_row = []
    for cell in row.find_all('td'):
        aqi_row.append(cell.text)
    aqi_data.append(aqi_row)

aqi_df = pd.DataFrame(aqi_data, columns=["City", "AQI", "Timestamp"])

# Fetch real-time weather data using OpenWeatherMap API
API_KEY = "your_openweathermap_api_key"
city_list = aqi_df["City"].unique()
weather_data = []

for city in city_list:
    url = f"http://api.openweathermap.org/data/2.5/weather?q={'Zurich'}&appid={'a5e7c51adb87d5ab7af47ae9f61a4ae8'}"
    response = requests.get(url).json()
    weather_data.append(response)

weather_df = pd.DataFrame(weather_data)

# Data cleaning and preparation
cleaned_aqi_df = aqi_df.dropna().drop_duplicates()
cleaned_weather_df = weather_df.dropna().drop_duplicates()
merged_df = pd.merge(cleaned_aqi_df, cleaned_weather_df, on="City")
merged_df["Season"] = merged_df["Timestamp"].apply(lambda x): get_season(
    month = pd.to_datetime(timestamp).month
    if month in [3, 4, 5]:
        return 'Spring'
    elif month in [6, 7, 8]:
        return 'Summer'
    elif month in [9, 10, 11]:
        return 'Autumn'
    else:
        return 'Winter'
)  # Function to determine the season

# Storing the data in a database
conn = sqlite3.connect("aqi_weather.db")
merged_df.to_sql("data", conn, if_exists="replace", index=False)

# Fetch data from the database for analysis
data_df = pd.read_sql_query("SELECT * FROM data", conn)

# EDA

# Apply the necessary EDA steps here

# Feature engineering and model selection
X = data_df.drop(["AQI", "City", "Timestamp"], axis=1)
y = data_df["AQI"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Implementing ML models and evaluating them
# Linear Regression
lr = LinearRegression()
lr.fit(X_train, y_train)
lr_preds = lr.predict(X_test)

print("Linear Regression Evaluation:")
print("Mean Squared Error:", mean_squared_error(y_test, lr_preds))
print("Mean Absolute Error:", mean_absolute_error(y_test, lr_preds))
print("R2 Score:", r2_score(y_test, lr_preds))

# Random Forest
rf = RandomForestRegressor(n_estimators=100)
rf.fit(X_train, y_train)
rf_preds = rf.predict(X_test)

print("\nRandom Forest Evaluation:")
print("Mean Squared Error:", mean_squared_error(y_test, rf_preds))
print("Mean Absolute Error:", mean_absolute_error(y_test, rf_preds))
print("R2 Score:", r2_score(y_test, rf_preds))

# Neural Networks
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

nn_model = Sequential()
nn_model.add(Dense(50, input_dim=X_train_scaled.shape[1], activation='relu'))
nn_model.add(Dense(25, activation='relu'))
nn_model.add(Dense(1, activation='linear'))
nn_model.compile(loss='mean_squared_error', optimizer='adam')
nn_model.fit(X_train_scaled, y_train, epochs=100, batch_size=32, verbose=0)
nn_preds = nn_model.predict(X_test_scaled)

print("\nNeural Network Evaluation:")
print("Mean Squared Error:", mean_squared_error(y_test, nn_preds))
print("Mean Absolute Error:", mean_absolute_error(y_test, nn_preds))
print("R2 Score:", r2_score(y_test, nn_preds))

# Visualization of AQI levels on maps using geopandas and folium
gdf = gpd.GeoDataFrame(
    data_df, geometry=gpd.points_from_xy(data_df.lon, data_df.lat))

m = folium.Map(location=[20, 0], zoom_start=2)
for idx, row in gdf.iterrows():
    folium.CircleMarker(location=[row['lat'], row['lon']],
                        radius=5,
                        color='red' if row['AQI'] > 100 else 'green',
                        fill=True,
                        fill_color='red' if row['AQI'] > 100 else 'green',
                        fill_opacity=0.7,
                        tooltip=row['City']).add_to(m)

m.save("aqi_map.html")

# Making the Python code/Jupyter notebooks available in a public GitHub repository
# Create a new repository on GitHub and push the code and Jupyter notebooks to it.

SyntaxError: invalid syntax (1309757624.py, line 49)