In [12]:
%%pyspark
df = spark.read.load(path='abfss://global-economy-indicators-data@globaleconomyindicators.dfs.core.windows.net/transformed-data/indicators/part-00000-tid-8109756532729835153-e7d859b9-b48c-4a03-97c5-1c9192507ce4-6-1-c000.csv', format='csv',header=True)
display(df.limit (10))   


In [13]:
!pip install opendatasets
import opendatasets as od

In [14]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as pt
import warnings
warnings.filterwarnings("ignore")

In [15]:
import pandas as pd
import os

In [16]:
project_name = "Global_Economy_Indicators"

In [17]:
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

sns.set_style('darkgrid')
matplotlib.rcParams['font.size'] = 14
matplotlib.rcParams['figure.figsize'] = (9, 5)
matplotlib.rcParams['figure.facecolor'] = '#00000000'

<mark>Calculate total exports of goods and services by each country using python matplotlib</mark>

In [18]:
import pandas as pd
import matplotlib.pyplot as plt

def plot_total_exports(df, title="Total Exports of Goods and Services by Country"):
    """Calculates and plots total exports using matplotlib.

    Args:
        df: Pandas DataFrame with 'Country' and 'Exports_of_goods_and_services' columns.
        title: Title for the plot.

    Returns:
        None. Displays the plot.  Prints an error message and returns if the input is invalid.
    """

    if not isinstance(df, pd.DataFrame):
        print("Error: Input must be a Pandas DataFrame.")
        return

    if 'Exports_of_goods_and_services' not in df.columns or 'Country' not in df.columns:
        print("Error: DataFrame must contain 'Country' and 'Exports_of_goods_and_services' columns.")
        return

    try:
        total_exports = df.groupby('Country')['Exports_of_goods_and_services'].sum().reset_index()
        total_exports = total_exports.sort_values('Exports_of_goods_and_services', ascending=False)

        plt.figure(figsize=(12, 6))  # Adjust figure size for better readability
        plt.bar(total_exports['Country'], total_exports['Exports_of_goods_and_services'])
        plt.xlabel("Country", fontsize=12)
        plt.ylabel("Total Exports", fontsize=12)
        plt.title(title, fontsize=14)
        plt.xticks(rotation=45, ha='right', fontsize=10)  # Rotate x-axis labels for readability
        plt.tight_layout() # Adjust layout to prevent labels from overlapping
        plt.show()

    except Exception as e:
        print(f"An error occurred during calculation or plotting: {e}")
        return


# Example usage (replace with your actual data loading):
try:
    df = pd.read_csv("your_data.csv")  # Replace "your_data.csv" with your file path
    plot_total_exports(df) # Call the function to plot the data

    # Example with custom title:
    plot_total_exports(df, title="Global Exports by Country (USD)")


except FileNotFoundError:
    print("Error: CSV file not found. Please provide a valid file path.")
except Exception as e:
    print(f"An error occurred: {e}")

# Example with sample data (for testing):
data = [
    {'Country': 'USA', 'Exports_of_goods_and_services': 1000},
    {'Country': 'Canada', 'Exports_of_goods_and_services': 500},
    {'Country': 'USA', 'Exports_of_goods_and_services': 1200},
    {'Country': 'Mexico', 'Exports_of_goods_and_services': 700},
    {'Country': 'Japan', 'Exports_of_goods_and_services': 900},
    {'Country': 'Germany', 'Exports_of_goods_and_services': 1100},
    {'Country': 'UK', 'Exports_of_goods_and_services': 800},
    {'Country': 'France', 'Exports_of_goods_and_services': 600},
    {'Country': 'China', 'Exports_of_goods_and_services': 1300}
]

df_sample = pd.DataFrame(data)
plot_total_exports(df_sample, title="Sample Exports Data") # Plot the sample data


# Example of error handling:
empty_df = pd.DataFrame()
plot_total_exports(empty_df)  # Will print error message

invalid_df = pd.DataFrame({'Country': ['USA', 'Canada'], 'SomeOtherColumn': [1, 2]})
plot_total_exports(invalid_df) # Will print error message


<mark>Show countries construction year using seaborn</mark>

In [19]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

def plot_construction_years(df, title="Construction Years by Country"):
    """Plots construction years by country using Seaborn.

    Args:
        df: Pandas DataFrame with 'Country' and 'Construction_ISIC_F' columns.
        title: Title for the plot.

    Returns:
        None. Displays the plot.  Prints an error message and returns if the input is invalid.
    """

    if not isinstance(df, pd.DataFrame):
        print("Error: Input must be a Pandas DataFrame.")
        return

    if 'Construction_ISIC_F' not in df.columns or 'Country' not in df.columns:
        print("Error: DataFrame must contain 'Country' and 'Construction_ISIC_F' columns.")
        return

    try:
        plt.figure(figsize=(12, 6))  # Adjust figure size as needed
        sns.countplot(x='Country', data=df, hue='Construction_ISIC_F') # Use countplot for categorical data
        plt.xlabel("Country", fontsize=12)
        plt.ylabel("Count", fontsize=12)
        plt.title(title, fontsize=14)
        plt.xticks(rotation=45, ha='right', fontsize=10)
        plt.tight_layout()
        plt.show()

    except Exception as e:
        print(f"An error occurred during plotting: {e}")
        return


# Example usage (replace with your actual data loading):
try:
    df = pd.read_csv("your_data.csv")  # Replace "your_data.csv" with your file path
    plot_construction_years(df)

    # Example with custom title:
    plot_construction_years(df, title="Construction Activity by Country")

except FileNotFoundError:
    print("Error: CSV file not found. Please provide a valid file path.")
except Exception as e:
    print(f"An error occurred: {e}")


# Example with sample data (for testing):
data = [
    {'Country': 'USA', 'Construction_ISIC_F': 2020},
    {'Country': 'Canada', 'Construction_ISIC_F': 2020},
    {'Country': 'USA', 'Construction_ISIC_F': 2021},
    {'Country': 'Mexico', 'Construction_ISIC_F': 2020},
    {'Country': 'USA', 'Construction_ISIC_F': 2022},
    {'Country': 'Canada', 'Construction_ISIC_F': 2021},
    {'Country': 'Mexico', 'Construction_ISIC_F': 2021},
    {'Country': 'Japan', 'Construction_ISIC_F': 2022},
    {'Country': 'Germany', 'Construction_ISIC_F': 2021},
    {'Country': 'UK', 'Construction_ISIC_F': 2020},
    {'Country': 'France', 'Construction_ISIC_F': 2022},
    {'Country': 'China', 'Construction_ISIC_F': 2020}
]

df_sample = pd.DataFrame(data)
plot_construction_years(df_sample, title="Sample Construction Data")


# Example of error handling:
empty_df = pd.DataFrame()
plot_construction_years(empty_df)  # Will print error message

invalid_df = pd.DataFrame({'Country': ['USA', 'Canada'], 'SomeOtherColumn': [1, 2]})
plot_construction_years(invalid_df) # Will print error message

<mark>Show Population growth for countries for last 50 years using linechart</mark>

In [20]:
import pandas as pd
import matplotlib.pyplot as plt

# Sample Data (REPLACE THIS WITH YOUR ACTUAL DATA)
data = {'Country': ['USA', 'Canada', 'UK', 'Germany', 'Japan'],
        '1973': [210, 22, 56, 78, 108],  # Population in millions
        '1983': [235, 25, 58, 80, 110],
        '1993': [260, 28, 60, 82, 115],
        '2003': [285, 32, 63, 83, 120],
        '2013': [310, 35, 65, 84, 125],
        '2023': [335, 38, 68, 83, 125]}
df = pd.DataFrame(data).set_index('Country') # Set 'Country' as index

# Transpose the DataFrame to have years as rows and countries as columns
df_transposed = df.T

# Plotting the line chart
plt.figure(figsize=(12, 6))
for country in df.index:
    plt.plot(df_transposed.index, df_transposed[country], label=country)

plt.xlabel('Year')
plt.ylabel('Population (Millions)')
plt.title('Population Growth Over 50 Years')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()



<mark>show country by gross national income in USD and imports goods and services for year 2020 using histogram </mark>


In [21]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns  # For nicer plots

# Sample Data (REPLACE THIS WITH YOUR ACTUAL DATA)
data = {'Country': ['USA', 'Canada', 'UK', 'Germany', 'Japan', 'France', 'Australia', 'Brazil', 'India', 'China'],
        'GNI': [65000, 50000, 45000, 52000, 48000, 47000, 55000, 15000, 2000, 12000],
        'Imports': [25000, 18000, 20000, 22000, 19000, 17000, 21000, 8000, 1000, 5000]}
df = pd.DataFrame(data)

# 1. Scatter Plot (Recommended for two variables)
plt.figure(figsize=(10, 6))  # Adjust figure size for better readability
sns.scatterplot(x='GNI', y='Imports', data=df, hue='Country', s=50) # Hue adds color by country, s changes marker size
plt.title('GNI vs. Imports (2020)')
plt.xlabel('Gross National Income (USD)')
plt.ylabel('Imports of Goods and Services (USD)')
plt.grid(True)  # Add a grid for better visualization
plt.tight_layout() # Adjust layout to prevent labels from overlapping
#plt.legend(loc='upper left', bbox_to_anchor=(1,1)) # Place the legend outside the plot
plt.show()

# 2. 2D Histogram / Heatmap (Alternative for showing density)
plt.figure(figsize=(10, 6))
plt.hist2d(df['GNI'], df['Imports'], bins=(10,10), cmap='viridis') # Adjust bins for resolution
plt.colorbar(label='Number of Countries')  # Add a colorbar
plt.title('2D Histogram of GNI and Imports (2020)')
plt.xlabel('Gross National Income (USD)')
plt.ylabel('Imports of Goods and Services (USD)')
plt.tight_layout()
plt.show()


# 3. Separate Histograms (If you want to look at distributions individually)
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1) # 1 row, 2 columns, first plot
sns.histplot(df['GNI'], kde=True) # KDE adds a Kernel Density Estimate
plt.title('Distribution of GNI')

plt.subplot(1, 2, 2) # 1 row, 2 columns, second plot
sns.histplot(df['Imports'], kde=True)
plt.title('Distribution of Imports')

plt.tight_layout()
plt.show()



<mark>Show max GDP for last 50 years per country using barchart</mark>

In [22]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns  # For nicer plots

# Sample Data (REPLACE THIS WITH YOUR ACTUAL DATA)
data = {'Country': ['USA', 'Canada', 'UK', 'Germany', 'Japan', 'France', 'Australia', 'Brazil', 'India', 'China'],
        '1973': [10000, 4000, 5000, 6000, 7000, 5500, 4500, 2000, 800, 1500],
        '1983': [12000, 4500, 5500, 7000, 8000, 6000, 5000, 2500, 1000, 2000],
        '1993': [15000, 5000, 6000, 8000, 9000, 7000, 6000, 3000, 1200, 3000],
        '2003': [18000, 6000, 7000, 9000, 10000, 8000, 7000, 4000, 1500, 5000],
        '2013': [22000, 7000, 8000, 10000, 11000, 9000, 8000, 5000, 2000, 8000],
        '2023': [25000, 8000, 9000, 11000, 12000, 10000, 9000, 6000, 2500, 12000]}
df = pd.DataFrame(data).set_index('Country')

# Find the maximum GDP for each country
max_gdp = df.max(axis=1)

# Plotting the bar chart
plt.figure(figsize=(12, 6))
sns.barplot(x=max_gdp.index, y=max_gdp.values)  # Use seaborn for better styling
plt.xlabel('Country')
plt.ylabel('Maximum GDP')
plt.title('Maximum GDP per Country (Last 50 Years)')
plt.xticks(rotation=45, ha='right')  # Rotate x-axis labels for readability if needed
plt.tight_layout()
plt.show()