In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.express as px
import plotly.io as pio


In [2]:
# Load investment data
investment_data = pd.read_excel('/Users/ranggaakhli/Documents/China-global-investment-tracker.xlsx')

# Group investment data by year, sum the quantities
investment_by_year = investment_data.groupby('Year')['Quantity in Millions'].sum().reset_index()

# Create line chart to compare total investments over the years
fig = px.line(investment_by_year, x='Year', y='Quantity in Millions', 
              title="China's Total Global Investment Trends Over Time", 
              labels={'Year': 'Year', 'Quantity in Millions': 'Total Investment (Millions)'}, 
              markers=True,  # Show markers at data points
              template='plotly')  # Use light theme for better visualization

# Add trend line using ordinary least squares method
fig.add_scatter(x=investment_by_year['Year'], 
                y=investment_by_year['Quantity in Millions'].rolling(window=5).mean(), 
                mode='lines', 
                name='Trend Line')

# Update layout for better visualization
fig.update_layout(
    xaxis=dict(tickmode='linear'),  # Show all years on x-axis
    yaxis=dict(tickformat=',.0f'),  # Format y-axis ticks as comma-separated integers
    legend=dict(orientation='h', yanchor='bottom', y=1.02, xanchor='right', x=1),  # Position legend below the plot
    title_font=dict(size=24),  # Increase title font size
    font=dict(size=14),  # Increase general font size
    title_x=0.5,  # Center the title
)

# Add source and visualization credit with adjusted vertical position
fig.add_annotation(text="Source: American Enterprise Institute | Visualization: https://medium.com/@ranggaakhli", xref="paper", yref="paper", x=0.5, y=-0.3, showarrow=False)

fig.show()


In [3]:
# Group investment data by sector and sum the quantities
investment_by_sector = investment_data.groupby('Sector')['Quantity in Millions'].sum().reset_index()

# Sort sectors by investment amount and get the top 8
top_8_sectors = investment_by_sector.nlargest(8, 'Quantity in Millions')['Sector'].tolist()

# Replace sectors not in the top 8 with 'Other'
investment_data['Sector'] = investment_data['Sector'].apply(lambda x: x if x in top_8_sectors else 'Other')

# Group investment data by modified sector and sum the quantities
investment_by_sector_modified = investment_data.groupby('Sector')['Quantity in Millions'].sum().reset_index()

# Move 'Other' to the end
investment_by_sector_modified = investment_by_sector_modified.sort_values(by='Sector', key=lambda x: x == 'Other')

# Create donut chart to visualize the distribution of investments by sector
fig = px.pie(investment_by_sector_modified, values='Quantity in Millions', names='Sector', 
             title='Distribution of Investments by Sector',
             hole=0.4)  # Set hole parameter to create a donut chart

# Add source and visualization credit
fig.add_annotation(text="Source: American Enterprise Institute | Visualization: https://medium.com/@ranggaakhli", xref="paper", yref="paper", x=0.5, y=-0.3, showarrow=False)

fig.show()


In [4]:
# Group investment data by country, sum the quantities
investment_by_country = investment_data.groupby('Country')['Quantity in Millions'].sum().reset_index()

# Create treemap to visualize the composition of investments by country
fig = px.treemap(investment_by_country, 
                 path=['Country'], 
                 values='Quantity in Millions',
                 title='Composition of Investments by Country')

# Add source and visualization credit
fig.add_annotation(text="Source: American Enterprise Institute | Visualization: https://medium.com/@ranggaakhli", xref="paper", yref="paper", x=0.5, y=-0.3, showarrow=False)


fig.show()


In [5]:
# Group investment data by sector, subsector, and country, sum the quantities
investment_by_sector_subsector_country = investment_data.groupby(['Sector', 'Subsector', 'Country'])['Quantity in Millions'].sum().reset_index()

# Create treemap to visualize the composition of investments by sector, subsector, and country
fig = px.treemap(investment_by_sector_subsector_country, 
                 path=['Sector', 'Subsector', 'Country'], 
                 values='Quantity in Millions',
                 title='Composition of Investments by Sector, Subsector, and Country')

fig.add_annotation(text="Source: American Enterprise Institute | Visualization: https://medium.com/@ranggaakhli", xref="paper", yref="paper", x=0.5, y=-0.3, showarrow=False)


fig.show()



In [6]:
# Group investment data by country, sum the quantities
investment_by_country = investment_data.groupby('Country')['Quantity in Millions'].sum().reset_index()

# Create choropleth map to visualize the distribution of investments by country
fig = px.choropleth(investment_by_country, 
                    locations='Country', 
                    locationmode='country names',
                    color='Quantity in Millions',
                    color_continuous_scale='Deep',  # Use a different color scale
                    range_color=(0, investment_by_country['Quantity in Millions'].max()),  # Set the color scale range
                    hover_name='Country',  # Add hover information
                    hover_data={'Quantity in Millions': True},
                    title='Distribution Map of China Global Investments by Country',
                    labels={'Quantity in Millions': 'Investment (Millions)'}
                   )

# Change the projection of the map to equirectangular
fig.update_geos(projection_type="equirectangular")

# Update color bar title
fig.update_layout(coloraxis_colorbar=dict(title='Investment (Millions)'))

fig.add_annotation(text="Source: American Enterprise Institute | Visualization: https://medium.com/@ranggaakhli", xref="paper", yref="paper", x=0.5, y=-0.3, showarrow=False)


fig.show()


In [7]:
# Group investment data by sector, region, and sum the quantities
investment_by_sector_region = investment_data.groupby(['Sector', 'Region'])['Quantity in Millions'].sum().reset_index()

# Sort the data to get the top 3 sectors by investment amount in each region
top_sectors_by_region = investment_by_sector_region.groupby('Region').apply(lambda x: x.nlargest(3, 'Quantity in Millions')).reset_index(drop=True)

# Create a bubble chart to visualize the top 3 sectors by investment amount in each region
fig = px.scatter(top_sectors_by_region, x='Region', y='Sector', size='Quantity in Millions', 
                 color='Sector', hover_name='Sector', 
                 title='Top 3 Sectors by Investment Amount in Each Region',
                 labels={'Region': 'Source: American Enterprise Institute | Visualization: https://medium.com/@ranggaakhli', 'Sector': 'Sector', 'Quantity in Millions': 'Investment (Millions)'},
                 size_max=50)



fig.show()
