In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import bokeh
import altair as alt
import ggplot as gg
import folium


In [2]:
# Load the dataset
file_path = "/kaggle/input/countries-poluation/air_pollution new.csv"
air_quality_data = pd.read_csv(file_path)

# Display the first few rows of the dataset
air_quality_data.head()


Unnamed: 0,city,country,2017,2018,2019,2020,2021,2022,2023
0,Kabul,Afghanistan,0,61.8,58.8,46.5,37.5,17.1,18.1
1,Tirana,Albania,0,0.0,0.0,16.0,12.5,14.5,14.4
2,Algiers,Algeria,0,0.0,21.2,20.2,20.0,17.8,17.4
3,Ordino,Andorra,0,0.0,0.0,7.4,7.3,5.4,5.3
4,Luanda,Angola,0,0.0,15.9,13.0,11.0,8.8,8.7


In [3]:
# Count unique cities and countries
num_unique_cities = air_quality_data['city'].nunique()
num_unique_countries = air_quality_data['country'].nunique()

print(f"Number of unique cities: {num_unique_cities}")
print(f"Number of unique countries: {num_unique_countries}")

Number of unique cities: 6852
Number of unique countries: 133


In [4]:
# Summary statistics for the year 2020
summary_2020 = air_quality_data['2020'].describe()

print("Summary Statistics for the year 2020:")
print(summary_2020)


Summary Statistics for the year 2020:
count     6985
unique     534
top          0
freq      3125
Name: 2020, dtype: object


**How does the distribution of Air Quality Index (AQI) values look for the year 2021?**

In [5]:
import plotly.express as px
import pandas as pd

# Assuming air_quality_data is a DataFrame with a column named '2021' containing AQI values

# Create a DataFrame with a column 'AQI' for the year 2021
data_2021 = pd.DataFrame({'AQI': air_quality_data['2021']})

# Create a histogram using Plotly Express
fig = px.histogram(data_2021, x='AQI', nbins=20, title='Distribution of Air Quality Index (AQI) in 2021',
                   labels={'AQI': 'AQI', 'count': 'Frequency'})

# Display the plot
fig.show()

**How does the distribution of AQI values vary across different countries for the year 2021?**

In [6]:
# Create an interactive histogram for AQI distribution in 2021 using Plotly
fig = px.histogram(air_quality_data, x='2021', color='country',
                   title='Distribution of Air Quality Index (AQI) Across Countries in 2021',
                   labels={'2021': 'AQI'}, height=1000, width=1000)

# Show the interactive chart
fig.show()


**How does the AQI vary across different countries in the year 2022? Let's use an interactive choropleth map.**

In [7]:
# Assuming air_quality_data is your original DataFrame
air_quality_2022 = air_quality_data[['country', '2022']].dropna()

# Create an interactive choropleth map for AQI in 2022
fig = px.choropleth(air_quality_2022, locations='country', locationmode='country names',
                    color='2022', title='Air Quality Index (AQI) Across Countries in 2022',
                    labels={'2022': 'AQI'})

# Show the interactive choropleth map
fig.show()


**How does the distribution of AQI values vary across different months in the year 2021?**

In [8]:
# Extract month information from the index (assuming the index is datetime)
air_quality_data.index = pd.to_datetime(air_quality_data.index)
air_quality_data['Month'] = air_quality_data.index.month_name()

# Create an interactive box plot for AQI distribution across months in 2021
fig = px.box(air_quality_data[air_quality_data.columns[4:]], x='Month', y=air_quality_data.columns[4:],
             title='Distribution of AQI Across Months in 2021',
             labels={'Month': 'Month', 'value': 'AQI'})

# Show the interactive box plot
fig.show()


**Let's create a sunburst chart to visualize the distribution of AQI in different regions.**

In [9]:
unique_values_2022 = air_quality_data['2022'].unique()
# Convert '2022' column to numeric, coercing errors to NaN
air_quality_data['2022'] = pd.to_numeric(air_quality_data['2022'], errors='coerce')

# Create an interactive sunburst chart for regional distribution of AQI
fig = px.sunburst(air_quality_data, path=['country', 'city'], values='2022',
                  title='Regional Distribution of AQI in 2022',
                  labels={'2022': 'AQI'}, height=1000, width=1000)

# Show the interactive sunburst chart
fig.show()


**Can we create a bar chart to compare the average AQI values between different countries for the year 2021?**

In [10]:
# Filter data for the year 2021
air_quality_2021 = air_quality_data[['country', '2021']].dropna()

# Create an interactive bar chart for average AQI values between different countries in 2021
fig = px.bar(air_quality_2021, x='country', y='2021',
             title='Comparison of Average AQI Between Different Countries in 2021',
             labels={'country': 'Country', '2021': 'Average AQI'}, height=1000)

# Show the interactive bar chart
fig.show()


**Can we create a radar chart to compare the AQI values for different pollutants in a specific city?**

In [11]:
# Choose a specific city for analysis
selected_city = 'YourCityName'

# Filter data for the selected city
city_data = air_quality_data[air_quality_data['city'] == selected_city]

# Create an interactive radar chart for AQI values of different pollutants
fig = px.line_polar(city_data, r=city_data.columns[2:], theta=city_data.columns[2:],
                    title=f'AQI Values of Different Pollutants in {selected_city}',
                    labels={'theta': 'Pollutant', 'r': 'AQI'})

# Show the interactive radar chart
fig.show()
