# Libraries

In [None]:
import pandas as pd                       # For data manipulation and analysis
import numpy as np                        # For numerical operations
import matplotlib.pyplot as plt           # For creating static, animated, and interactive visualizations
import matplotlib.ticker as ticker
import seaborn as sns                     # For statistical data visualization
import warnings
warnings.filterwarnings('ignore')

# Load the Dataset's

In [None]:
# Read CSV files into Pandas DataFrames
death = pd.read_csv("/kaggle/input/covid-19-deaths-and-vaccinations-dataset/COVID DEATHS.csv")
vaccine = pd.read_csv("/kaggle/input/covid-19-deaths-and-vaccinations-dataset/COVID_VACCINATIONS.csv")

In [None]:
# Exploring the shape and information of the data
print(death.shape)
print(vaccine.shape)

In [None]:
print(death.info())
print(vaccine.info())

In [None]:
# Checking for duplicate rows
print(death[death.duplicated()])
print(vaccine[vaccine.duplicated()])

In [None]:
# Merge death and vaccine DataFrames based on a common key
combined = pd.merge(death, vaccine, how='outer')

In [None]:
combined.shape

## Data Cleaning

In [None]:
# Remove rows with missing continent information
mask = pd.isnull(combined['continent'])

combined[mask]

In [None]:
combined = combined[~mask]

combined.shape

In [None]:
# Sort the DataFrame by a relevant column (e.g., date) if needed
combined = combined.sort_values(by='date')

combined

In [None]:
combined.info()

In [None]:
combined = combined.drop(['total_cases', 'total_deaths', 'total_tests', 'total_vaccinations', 'people_fully_vaccinated', 'total_boosters', 'people_vaccinated', 'iso_code', 'new_deaths_smoothed', 'new_cases_smoothed', 'total_cases_per_million', 'new_cases_per_million', 'new_cases_smoothed_per_million', 'total_deaths_per_million', 'new_deaths_per_million', 'new_deaths_smoothed_per_million', 'hosp_patients_per_million', 'weekly_icu_admissions', 'weekly_icu_admissions_per_million', 'weekly_hosp_admissions', 'weekly_hosp_admissions_per_million', 'icu_patients_per_million', 'reproduction_rate', 'icu_patients', 'hosp_patients', 'positive_rate', 'tests_units', 'tests_per_case', 'total_tests_per_thousand', 'new_tests_per_thousand', 'new_tests_smoothed', 'new_tests_smoothed_per_thousand', 'new_vaccinations_smoothed', 'total_vaccinations_per_hundred', 'people_vaccinated_per_hundred', 'people_fully_vaccinated_per_hundred', 'total_boosters_per_hundred', 'new_vaccinations_smoothed_per_million', 'new_people_vaccinated_smoothed', 'new_people_vaccinated_smoothed_per_hundred', 'stringency_index', 'median_age', 'aged_65_older', 'aged_70_older', 'gdp_per_capita', 'extreme_poverty', 'cardiovasc_death_rate', 'diabetes_prevalence', 'female_smokers', 'male_smokers', 'handwashing_facilities', 'hospital_beds_per_thousand', 'life_expectancy', 'human_development_index', 'excess_mortality_cumulative_absolute', 'excess_mortality_cumulative', 'excess_mortality', 'excess_mortality_cumulative_per_million', 'population_density'], axis=1)


## Data Transformation

In [None]:
combined['date'] = pd.to_datetime(combined['date'])
combined['year'] = combined['date'].dt.year
combined['month'] = combined['date'].dt.strftime('%b')

In [None]:
# Fill missing values with 0
combined = combined.fillna(0)

In [None]:
# Rename columns for clarity
combined = combined.rename(columns={'new_cases':'cases',
                        'new_deaths':'deaths',
                        'new_tests':'tests',
                        'new_vaccinations':'vaccinations'
                        }
                        )

In [None]:
test = combined.copy()
test.info()

In [None]:
test

In [None]:
continent = ['Asia', 'Europe', 'Africa', 'Oceania', 'North America', 'South America']

year = [2020, 2021, 2022, 2023]

month = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

In [None]:
test['month'] = pd.Categorical(test['month'], categories=month, ordered=True)
test['continent'] = pd.Categorical(test['continent'], categories=continent, ordered=True)
test['year'] = pd.Categorical(test['year'], categories=year, ordered=True)

In [None]:
test = test.drop('date',axis=1)

# Extracting Key Performancing Indicators

## What are the total cases, deaths, tests, vaccinations based on the year 

### Defining Function to Calculate percentage change

In [None]:
def calculate_percent_change(current_value, baseline_value):
  """Calculates the percentage change from a baseline value.

  Args:
      current_value: The current value.
      baseline_value: The baseline value for comparison (2020 in this case).

  Returns:
      The percentage change as a float, or 100 if the baseline value is 0.
  """
  if baseline_value == 0:
    return 100  # Set 100 for baseline year (2020)
  else:
    return (current_value - baseline_value) / baseline_value * 100


In [None]:
# Set float format to suppress scientific notation
pd.options.display.float_format = '{:.2f}'.format

# Group by year and aggregate using 'max' function
year_cases = test.groupby('year',observed=True)[['cases', 'deaths', 'tests', 'vaccinations']].sum().reset_index()

In [None]:
# Calculate baseline values from the first row (assuming 2020 is year 0)
baseline_cases = year_cases.loc[0, 'cases']
baseline_deaths = year_cases.loc[0, 'deaths']
baseline_tests = year_cases.loc[0, 'tests']

In [None]:
# Apply the function to calculate percentage changes
year_cases['percent_change_in_cases'] = year_cases['cases'].apply(lambda x: calculate_percent_change(x, baseline_cases))
year_cases['percent_change_in_deaths'] = year_cases['deaths'].apply(lambda x: calculate_percent_change(x, baseline_deaths))
year_cases['percent_change_in_tests'] = year_cases['tests'].apply(lambda x: calculate_percent_change(x, baseline_tests))

In [None]:
# Replace NaN with 100 (optional, as the calculation already handles this)
year_cases.replace(0.00,100.00, inplace=True)

In [None]:
year_cases

### Visualizing the Total Test's, Cases, Vaccinations, Deaths

In [None]:
# Create a figure with a grid of 2 by 2 subplots
fig, axs = plt.subplots(2, 2, figsize=(20, 10))

# Plot total cases
axs[0, 0].plot(year_cases['year'], year_cases['cases'], marker='o', label='Total Cases')
axs[0, 0].set_title('Total Cases Over Years')
axs[0, 0].set_xlabel('Year')
axs[0, 0].set_ylabel('Count')
axs[0, 0].legend()

# Plot total deaths
axs[0, 1].plot(year_cases['year'], year_cases['deaths'], marker='o', label='Total Deaths',color='red')
axs[0, 1].set_title('Total Deaths Over Years')
axs[0, 1].set_xlabel('Year')
axs[0, 1].set_ylabel('Count')
axs[0, 1].legend()

# Plot total tests
axs[1, 0].plot(year_cases['year'], year_cases['tests'], marker='o', label='Total Tests',color='green')
axs[1, 0].set_title('Total Tests Over Years')
axs[1, 0].set_xlabel('Year')
axs[1, 0].set_ylabel('Count')
axs[1, 0].legend()

# Plot total vaccinations
axs[1, 1].plot(year_cases['year'], year_cases['vaccinations'], marker='o', label='Total Vaccinations',color='orange')
axs[1, 1].set_title('Total Vaccinations Over Years')
axs[1, 1].set_xlabel('Year')
axs[1, 1].set_ylabel('Count')
axs[1, 1].legend()

# Adjust layout
plt.tight_layout()

# Show plots
plt.show()


### Visualizing the Percentage Change in Test's, Cases, Deaths, 

In [None]:
# Create a figure with subplots arranged in 1 row and 3 columns
fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(20, 5))

# Plotting the percentage change in cases
axes[0].plot(year_cases['year'], year_cases['percent_change_in_cases'], marker='o', label='Percent Change in Cases', color='blue')
axes[0].set_title('Percentage Change in Cases Over Years')
axes[0].set_xlabel('Year')
axes[0].set_ylabel('Percentage Change (%)')
axes[0].grid(True)
axes[0].legend()

# Plotting the percentage change in deaths
axes[1].plot(year_cases['year'], year_cases['percent_change_in_deaths'], marker='o', label='Percent Change in Deaths', color='orange')
axes[1].set_title('Percentage Change in Deaths Over Years')
axes[1].set_xlabel('Year')
axes[1].set_ylabel('Percentage Change (%)')
axes[1].grid(True)
axes[1].legend()

# Plotting the percentage change in tests
axes[2].plot(year_cases['year'], year_cases['percent_change_in_tests'], marker='o', label='Percent Change in Tests', color='green')
axes[2].set_title('Percentage Change in Tests Over Years')
axes[2].set_xlabel('Year')
axes[2].set_ylabel('Percentage Change (%)')
axes[2].grid(True)
axes[2].legend()

# Adjust layout and display the plots
plt.tight_layout()
plt.show()


## Percentage Change in Cases and Deaths Over Months

In [None]:
changes = test.groupby(['year','month'],observed=True)[['cases','deaths']].max().reset_index()

changes['percent_change_in_cases'] = changes['cases'].pct_change() * 100
changes['percent_change_in_deaths'] = changes['deaths'].pct_change() * 100

In [None]:
changes

### Checking for Outliers

### Defining Function For Formatting plot labels

In [None]:
def format_tick_label(x, pos):
    if x >= 1_000_000:
        return f'{x / 1_000_000:.0f}M'
    elif x >= 1_000:
        return f'{x / 1_000:.0f}k'
    else:
        return f'{x:.0f}'

In [None]:
# Create a figure and axes with 1 row and 2 columns
fig, axes = plt.subplots(1, 2, figsize=(15, 5))

# Plot the boxplot for 'cases' with standardized labels on the first subplot (axes[0])
sns.boxplot(x=changes['cases'], ax=axes[0])

axes[0].set_title("Checking Outliers for Cases ")


# Format the tick labels for the x-axis of the first subplot
axes[0].xaxis.set_major_formatter(ticker.FuncFormatter(format_tick_label))

# Plot the boxplot for 'deaths' with standardized labels on the second subplot (axes[1])
sns.boxplot(x=changes['deaths'], ax=axes[1])

# Format the tick labels for the x-axis of the second subplot
axes[1].xaxis.set_major_formatter(ticker.FuncFormatter(format_tick_label))

axes[1].set_title("Checking Outliers for Deaths ")

axes[1].set_xlabel('Deaths')

plt.show()

### Calculating Interquartile Range

In [None]:
# we will calculate the interquartile range of the cases and will do the same for deaths also and then we will remove the outliers 

col = ['cases', 'deaths']

for i in col:
    Q1 = changes[i].quantile(0.25)
    Q3 = changes[i].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    
    # Filter the DataFrame based on the calculated bounds for column 'i'
    filtered_data = changes[(changes[i] >= lower_bound) & (changes[i] <= upper_bound)]
    
    # Update the 'changes' DataFrame with the filtered data for column 'i'
    changes = changes.loc[filtered_data.index]

# Now the DataFrame contains only rows where 'cases' and 'deaths' are within the IQR range

#### Metrics Tabel of Average Percentage changes in cases and deaths by year

In [None]:
metrics = pd.DataFrame(  
                        {
                        'year' :[2020,2021,2022,2023],
                        'cases':changes.groupby('year',observed=True)['percent_change_in_cases'].mean().values,
                        'death':changes.groupby('year',observed=True)['percent_change_in_deaths'].mean().values
                        }
                        )

metrics

In [None]:
plt.figure(figsize=(20,5))
sns.lineplot(data=metrics,x='year',y='cases')
sns.lineplot(data=metrics,x='year',y='death')
plt.title("Comparision of Trend Lines for Cases and Deaths")
plt.ylabel('Percentage of Cases and Deaths')
plt.xlabel('Years')
plt.show()

### Visualizing the Trend of `Cases` throughout months of different Years 

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Iterate over each year and create plots
for i, year in enumerate([2020, 2021, 2022, 2023]):
    row = i // 2  # Determine the row index
    col = i % 2   # Determine the column index

    # Filter data for the current year
    year_data = changes[changes['year'] == year]

    # Plot barplot, lineplot, and scatterplot for cases vs month
    sns.barplot(data=year_data, x='month', y='cases', hue='month', ax=axes[row, col])
    sns.lineplot(data=year_data, x='month', y='cases', ax=axes[row, col])
    sns.scatterplot(data=year_data, x='month', y='cases', ax=axes[row, col])

    # Set title for each subplot based on the year
    axes[row, col].set_title(str(year))
    axes[row,col].get_legend().remove()

    # Remove y-axis label for all except the leftmost plots in each row
    if col != 0:
        axes[row, col].set_ylabel('')
        

# Set a shared y-axis label for all leftmost plots
axes[0, 0].set_ylabel('Cases')

# Set tight layout and adjust spacing
plt.tight_layout()


### Visualizing the Trend of `Deaths` throughout months of different Years 

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Iterate over each year and create plots
for i, year in enumerate([2020, 2021, 2022, 2023]):
    row = i // 2  # Determine the row index
    col = i % 2   # Determine the column index

    # Filter data for the current year
    year_data = changes[changes['year'] == year]

    # Plot barplot, lineplot, and scatterplot for cases vs month
    sns.barplot(data=year_data, x='month', y='deaths', hue='month', ax=axes[row, col])
    sns.lineplot(data=year_data, x='month', y='deaths', ax=axes[row, col])
    sns.scatterplot(data=year_data, x='month', y='deaths', ax=axes[row, col])

    # Set title for each subplot based on the year
    axes[row, col].set_title(str(year))
    axes[row,col].get_legend().remove()

    # Remove y-axis label for all except the leftmost plots in each row
    if col != 0:
        axes[row, col].set_ylabel('')
        

# Set a shared y-axis label for all leftmost plots
axes[0, 0].set_ylabel('Deaths')

# Set tight layout and adjust spacing
plt.tight_layout()


### What Percent of World Population got Affected, Tested, Vaccinated and died

In [None]:
affected = test.groupby(['year','continent','location'],observed=True).agg({'population':'max','tests':'max','cases':'max','deaths':'max'}).reset_index()

In [None]:
affected

### Calculating the percentage of population tested and cases and deaths emerged from those deaths 

In [None]:
affected['percent_of_population_tested'] = (affected['tests']/affected['population'])*100
affected['percent_of_cases_from_tests'] = (affected['cases']/affected['tests'])*100
affected['percent_of_deaths_from_cases'] = (affected['deaths']/affected['cases'])*100

In [None]:
affected

### Replacing `NaN` Values

In [None]:
affected.replace([np.inf, -np.inf], np.nan, inplace=True)

### Metrics Tabel of Average Percentage Tests cases and deaths by year

In [None]:
# Calculate metrics for each year
metrics_1 = pd.DataFrame({
    'year': [2020, 2021, 2022, 2023],  # Corrected year values
    'tests': affected.groupby('year',observed=True)['percent_of_population_tested'].mean().values,
    'cases': affected.groupby('year',observed=True)['percent_of_cases_from_tests'].mean().values,
    'deaths': affected.groupby('year',observed=True)['percent_of_deaths_from_cases'].mean().values
})


metrics_1

In [None]:
metrics_1.replace(np.nan, 0.00, inplace=True)

In [None]:
metrics_1

### Visualizing the Trendlines of tests, cases and deaths

In [None]:
fig, axes=plt.subplots(1,3,figsize=(20,5))

sns.lineplot(x=metrics_1['year'],y=metrics_1['tests'],ax=axes[0])
sns.lineplot(x=metrics_1['year'],y=metrics_1['cases'],ax=axes[1])
sns.lineplot(x=metrics_1['year'],y=metrics_1['deaths'],ax=axes[2])

plt.tight_layout()

plt.show()

## Continents and Countries with Cases and Deaths

In [None]:
countries = test.groupby(['continent','location','year','month'],observed=True)[['population','cases','deaths','tests','vaccinations']].max().reset_index()

In [None]:
countries

### Aggregating COVID-19 Data by Year and Continent

In [None]:
year = list(test['year'].unique())


continents = {}
for i in year:
    data = countries[countries['year']==i].groupby(['continent','location'],observed=True)[['year','population','tests','cases','deaths','vaccinations']].agg({'population':'max','tests':'sum','cases':'sum','deaths':'sum'}).reset_index()
    data = data.groupby(['continent'],observed=True)[['population','tests','cases','deaths']].sum().reset_index()
    data['pop_tested'] = (data['tests'] / data['population']) *100
    data['pop_affected'] = (data['cases'] / data['tests']) *100
    data['pop_dead'] = (data['deaths'] / data['cases']) *100 
    data.replace([np.inf, -np.inf], np.nan, inplace=True)
    continents[i] = data

In [None]:
continents

### Plotting Population Tested Percentage by Continent Over Years

In [None]:
# Create a figure and axes with 2 rows and 2 columns
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

year=list(test['year'].unique())


# Loop through the years and continents to plot data
for i, year in enumerate(year):
    row = i // 2
    col = i % 2
    
    # Plotting bar, line, and scatter plots for population tested percentage by continent
    sns.barplot(x=continents[year]['continent'], y=continents[year]['pop_tested'], hue=continents[year]['continent'], ax=axes[row, col])
    sns.lineplot(x=continents[year]['continent'], y=continents[year]['pop_tested'], ax=axes[row, col])
    sns.scatterplot(x=continents[year]['continent'], y=continents[year]['pop_tested'], ax=axes[row, col])

    # Set title for each subplot based on the year
    axes[row, col].set_title(str(year))
    axes[row, col].set_xlabel('Continent')
    axes[row, col].set_ylabel('Population Tested (%)')

# Get handles and labels from the last subplot (axes[1, 1]) to create a unified legend
handles, labels = axes[1, 1].get_legend_handles_labels()

# Remove legends from all subplots
for ax in axes.flatten():
    ax.get_legend().remove()

# Create a unified legend for the entire figure
fig.legend(handles, labels, loc='upper right', title='Continent', bbox_to_anchor=(1.2, 1), facecolor='lightgrey', fontsize='medium', title_fontsize='large')

# Adjust layout and display the plots
plt.tight_layout()
plt.show()


### Population Affected Percentage by Continent Over Years

In [None]:

year = list(test['year'].unique())


# Create a figure and axes with 2 rows and 2 columns
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Loop through the years in the continents dictionary to plot data
for i, year in enumerate(continents):
    row = i // 2
    col = i % 2
    
    # Extract the DataFrame for the current year from the continents dictionary
    data = continents[year]
    
    # Handling NaN values in 'pop_affected' column by replacing with 0
    data['pop_affected'].fillna(0, inplace=True)
    
    # Plotting bar, line, and scatter plots for population affected percentage by continent
    sns.barplot(x='continent', y='pop_affected', hue='continent', data=data, ax=axes[row, col])
    sns.lineplot(x='continent', y='pop_affected', data=data, ax=axes[row, col])
    sns.scatterplot(x='continent', y='pop_affected', data=data, ax=axes[row, col])

    # Set title for each subplot based on the year
    axes[row, col].set_title(str(year))
    axes[row, col].set_xlabel('Continent')
    axes[row, col].set_ylabel('Population Affected (%)')

    # Remove legend from each subplot (optional, if you want a unified legend)
    axes[row, col].get_legend().remove()

# Get handles and labels from the last subplot (axes[1, 1]) to create a unified legend
handles, labels = axes[1, 1].get_legend_handles_labels()

# Create a unified legend for the entire figure
fig.legend(handles, labels, loc='upper right', title='Continent', bbox_to_anchor=(1.2, 1), facecolor='lightgrey', fontsize='medium', title_fontsize='large')

# Adjust layout and display the plots
plt.tight_layout()
plt.show()


### Population Dead Percentage among the affected ones by Continent Over Years


In [None]:

year = list(test['year'].unique())


# Create a figure and axes with 2 rows and 2 columns
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Loop through the years in the continents dictionary to plot data
for i, year in enumerate(continents):
    row = i // 2
    col = i % 2
    
    # Extract the DataFrame for the current year from the continents dictionary
    data = continents[year]
    
    # Handling NaN values in 'pop_affected' column by replacing with 0
    data['pop_affected'].fillna(0, inplace=True)
    
    # Plotting bar, line, and scatter plots for population affected percentage by continent
    sns.barplot(x='continent', y='pop_dead', hue='continent', data=data, ax=axes[row, col])
    sns.lineplot(x='continent', y='pop_dead', data=data, ax=axes[row, col])
    sns.scatterplot(x='continent', y='pop_dead', data=data, ax=axes[row, col])

    # Set title for each subplot based on the year
    axes[row, col].set_title(str(year))
    axes[row, col].set_xlabel('Continent')
    axes[row, col].set_ylabel('Population Affected (%)')

    # Remove legend from each subplot (optional, if you want a unified legend)
    axes[row, col].get_legend().remove()

# Get handles and labels from the last subplot (axes[1, 1]) to create a unified legend
handles, labels = axes[1, 1].get_legend_handles_labels()

# Create a unified legend for the entire figure
fig.legend(handles, labels, loc='upper right', title='Continent', bbox_to_anchor=(1.2, 1), facecolor='lightgrey', fontsize='medium', title_fontsize='large')

# Adjust layout and display the plots
plt.tight_layout()
plt.show()


## Top 10 Countries Data Analysis by Year


In [None]:
years = [2020, 2021, 2022, 2023]  # List of years to process

# Empty dictionary to store results for each year
country_data = {}

for year in years:
  # Filter data for the current year
  year_data = countries[countries['year'] == year]
  
  # Group by location, find maximum values, sort by cases (descending), and select top 10
  top_countries = year_data.groupby('location', observed=True)[['population', 'cases', 'deaths', 'tests', 'vaccinations']].max().sort_values(by='cases', ascending=False).reset_index().iloc[:10]
  top_countries['percent_tests'] = (top_countries['tests'] / top_countries['population']) * 100
  top_countries['percent_cases'] = (top_countries['cases'] / top_countries['tests']) * 100
  top_countries['percent_deaths'] = (top_countries['vaccinations'] / top_countries['cases']) * 100
  top_countries['percent_deaths'] = (top_countries['deaths'] / top_countries['vaccinations']) * 100
  top_countries.replace([np.inf, -np.inf], np.nan, inplace=True)

  
  # Store results for this year in the dictionary
  country_data[year] = top_countries

In [None]:
country_data

### Visualization of Top 10 Countries by COVID-19 Cases Over Years


In [None]:
fig, axes = plt.subplots(2, 2, figsize=(15, 8))

# Loop over each subplot and set the bar plot
for i, year in enumerate([2020, 2021, 2022, 2023]):
    row = i // 2
    col = i % 2
    sns.barplot(x='location', y='cases', data=country_data[year], ax=axes[row, col])
    axes[row, col].set_title(f"Year {year}")
    # Rotate x-axis labels for better readability
    axes[row, col].set_xticklabels(country_data[year]['location'], rotation=45, ha='right')
    
    # Set explicit tick positions to avoid warning
    axes[row, col].set_xticks(range(len(country_data[year]['location'])))
    
    # Apply custom formatting to y-axis tick labels
    axes[row, col].get_yaxis().set_major_formatter(plt.FuncFormatter(format_tick_label))

# Adjust layout and display the plot
plt.tight_layout()
plt.show()

### Visualization of Top 10 Countries by COVID-19 Deaths Over Years

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(15, 8))

# Loop over each subplot and set the bar plot
for i, year in enumerate([2020, 2021, 2022, 2023]):
    row = i // 2
    col = i % 2
    sns.barplot(x='location', y='deaths', data=country_data[year], ax=axes[row, col])
    axes[row, col].set_title(f"Year {year}")
    # Rotate x-axis labels for better readability
    axes[row, col].set_xticklabels(country_data[year]['location'], rotation=45, ha='right')
    
    # Set explicit tick positions to avoid warning
    axes[row, col].set_xticks(range(len(country_data[year]['location'])))
    
    # Apply custom formatting to y-axis tick labels
    axes[row, col].get_yaxis().set_major_formatter(plt.FuncFormatter(format_tick_label))

# Adjust layout and display the plot
plt.tight_layout()
plt.show()