In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
data = pd.read_csv("AirQuality.csv")
data

In [None]:
data.isnull().sum()

In [None]:
data.dropna(inplace=True)
cleaned_data = data.dropna()

In [None]:
cleaned_data

In [None]:
cleaned_data.isnull().sum()

In [None]:
data.info()

In [None]:
# Convert the 'Date' column to a datetime format for proper time series plotting
cleaned_data['Date'] = pd.to_datetime(cleaned_data['Date'])

In [None]:
# Sort the DataFrame by the 'Date' column (optional but recommended for time series)
cleaned_data.sort_values(by='Date', inplace=True)

In [None]:
# Create the line plot
plt.figure(figsize=(12, 6))
plt.plot(cleaned_data['Date'], cleaned_data['AQI'], label='AQI')
plt.ylabel('AQI')
plt.title('AQI Trend Over Time (Cleaned Data)')
plt.grid(True)
plt.show()

In [None]:
# Define the pollutants you want to plot
pollutants = ['PM2.5', 'PM10', 'CO']

# Create separate line plots for each pollutant
plt.figure(figsize=(12, 8))

for pollutant in pollutants:
    plt.plot(cleaned_data['Date'], cleaned_data[pollutant], label=pollutant)

plt.xlabel('Date')
plt.ylabel('Concentration')
plt.title('Pollutant Trends Over Time (Cleaned Data)')
plt.grid(True)
plt.show()

In [None]:
# Group the data by 'Date' and calculate the mean AQI for each date
aqi_by_date = cleaned_data.groupby('Date')['AQI'].mean().reset_index()

# Create a bar plot to compare AQI values across dates
plt.figure(figsize=(12, 6))
plt.bar(aqi_by_date['Date'], aqi_by_date['AQI'], color='blue', alpha=0.7)
plt.xlabel('Date')
plt.ylabel('Average AQI')
plt.title('Average AQI Across Dates')
plt.grid(True)
plt.show()


In [None]:
# Create violin plots for the same analysis (if desired)
plt.figure(figsize=(12, 6))
sns.violinplot(data=cleaned_data, x='AQI', y='PM2.5', palette='Set2', color='red')
plt.xlabel('AQI')
plt.ylabel('PM2.5')
plt.title('Violin Plot: Distribution of AQI for PM2.5')
plt.tight_layout()
plt.show()

plt.figure(figsize=(12, 6))
sns.violinplot(data=cleaned_data, x='AQI', y='PM10', palette='Set2', color='red')
plt.xlabel('AQI')
plt.ylabel('PM10')
plt.title('Violin Plot: Distribution of AQI for PM10')
plt.tight_layout()
plt.show()

plt.figure(figsize=(12, 6))
sns.violinplot(data=cleaned_data, x='AQI', y='CO', palette='Set2', color='red')
plt.xlabel('AQI')
plt.ylabel('CO')
plt.title('Violin Plot: Distribution of AQI for CO')
plt.tight_layout()
plt.show()

In [None]:
# Define variables for the bubble chart
x = cleaned_data['PM2.5']
y = cleaned_data['CO']
bubble_size = cleaned_data['PM10']  # Represent 'PM10' using bubble size
aqi_values = cleaned_data['AQI']

# Create the bubble chart
plt.figure(figsize=(12, 8))
plt.scatter(x, y, s=bubble_size, c=aqi_values, cmap='coolwarm', alpha=0.7)
plt.xlabel('PM2.5')
plt.ylabel('CO')
plt.title('Bubble Chart: AQI vs. PM2.5 and CO')
plt.colorbar(label='AQI')  # Add colorbar to show AQI values
plt.grid(True)
plt.tight_layout()
plt.show()