In [14]:
import pandas as pd
import numpy as np

In [15]:
# Creating a dictionary to represent the dataset
data = {
    'Country': ['USA', 'USA', 'China', 'China', 'Germany', 'Germany'],
    'Year': [2020, 2021, 2020, 2021, 2020, 2021],
    'Population': [331_000_000, 332_000_000, 1_402_000_000, 1_405_000_000, 83_000_000, 83_100_000],
    'GDP': [21_000_000, 21_500_000, 15_500_000, 16_000_000, 4_000_000, 4_200_000],
    'Life_Expectancy': [78.5, 78.7, 76.9, 77.1, 81.3, 81.5]
}

# Converting the data dictionary into a Pandas DataFrame
df = pd.DataFrame(data)

# Display the DataFrame to check the data structure
print(df)

   Country  Year  Population       GDP  Life_Expectancy
0      USA  2020   331000000  21000000             78.5
1      USA  2021   332000000  21500000             78.7
2    China  2020  1402000000  15500000             76.9
3    China  2021  1405000000  16000000             77.1
4  Germany  2020    83000000   4000000             81.3
5  Germany  2021    83100000   4200000             81.5


In [16]:
# ------------------ Summary Statistics using Pandas ------------------
print("\nUsing Pandas:")
# Calculate the mean of the Population column
print("Mean Population:\n", df['Population'].mean())

# Calculate the total GDP (sum of GDP values)
print("Total GDP:\n", df['GDP'].sum())

# Calculate the maximum life expectancy
print("Max Life Expectancy:\n", df['Life_Expectancy'].max())

# Calculate the minimum life expectancy
print("Min Life Expectancy:\n", df['Life_Expectancy'].min())

# ------------------ Summary Statistics using NumPy ------------------
print("\nUsing NumPy:")
# Calculate the mean of the GDP column using NumPy
print("Mean GDP (NumPy):", np.mean(df['GDP']))

# Calculate the standard deviation of the GDP using NumPy
print("Standard Deviation of GDP (NumPy):", np.std(df['GDP']))

# Calculate the total population using NumPy
print("Total Population (NumPy):", np.sum(df['Population']))


Using Pandas:
Mean Population:
 606016666.6666666
Total GDP:
 82200000
Max Life Expectancy:
 81.5
Min Life Expectancy:
 76.9

Using NumPy:
Mean GDP (NumPy): 13700000.0
Standard Deviation of GDP (NumPy): 7153087.911291645
Total Population (NumPy): 3636100000


In [17]:
# ------------------ Vectorized Operation to Increase GDP by 10% ------------------
# Apply a 10% increase to the GDP values (vectorized operation)
df['GDP'] = df['GDP'] * 1.10  # All rows of the 'GDP' column are multiplied by 1.10

# Display the updated GDP column after the increase
print("\nUpdated GDP with 10% increase:\n", df[['GDP']])


Updated GDP with 10% increase:
           GDP
0  23100000.0
1  23650000.0
2  17050000.0
3  17600000.0
4   4400000.0
5   4620000.0


In [18]:
# ------------------ Swap Levels of MultiIndex (Country and Year) ------------------
# Setting 'Country' and 'Year' as multi-level index
df.set_index(['Country', 'Year'], inplace=True)

# Swap the 'Country' and 'Year' index levels
df_swapped = df.swaplevel('Country', 'Year')

# Sort the index by 'Year' (after swapping levels)
df_swapped = df_swapped.sort_index(level='Year')

# Display the swapped DataFrame
print("\nData after swapping and sorting by 'Year':\n", df_swapped)


Data after swapping and sorting by 'Year':
               Population         GDP  Life_Expectancy
Year Country                                         
2020 China    1402000000  17050000.0             76.9
     Germany    83000000   4400000.0             81.3
     USA       331000000  23100000.0             78.5
2021 China    1405000000  17600000.0             77.1
     Germany    83100000   4620000.0             81.5
     USA       332000000  23650000.0             78.7


In [19]:
# ------------------ Unstack to Pivot Year into Columns ------------------
# Unstack the 'Year' level, making 'Year' the columns (pivoting)
df_unstacked = df.unstack(level='Year')

# Display the unstacked DataFrame
print("\nData after unstacking (Year becomes columns):\n", df_unstacked)


Data after unstacking (Year becomes columns):
          Population                     GDP             Life_Expectancy      
Year           2020        2021        2020        2021            2020  2021
Country                                                                      
China    1402000000  1405000000  17050000.0  17600000.0            76.9  77.1
Germany    83000000    83100000   4400000.0   4620000.0            81.3  81.5
USA       331000000   332000000  23100000.0  23650000.0            78.5  78.7


In [20]:
# ------------------ Retrieve Data for Specific Countries ------------------
# Access population data for USA
print("\nPopulation of USA:\n", df.loc['USA', 'Population'])

# Access population data for China
print("\nPopulation of China:\n", df.loc['China', 'Population'])


Population of USA:
 Year
2020    331000000
2021    332000000
Name: Population, dtype: int64

Population of China:
 Year
2020    1402000000
2021    1405000000
Name: Population, dtype: int64
