In [18]:
import pandas as pd
import numpy as np
import plotly.express as px

# Read the CSV file into a pandas DataFrame
df = pd.read_csv('world-data-2023.csv')

# Convert non-string columns to string type
if 'non_string_column' in df.columns:
    df['non_string_column'] = df['non_string_column'].astype(str)


def is_string_column(column):
    return column.dtype == 'object'

# Check if each column is a string column
string_columns = df.columns[df.apply(is_string_column)]

# Apply the str accessor only to string columns
mask = df[string_columns].apply(lambda x: x.str.contains('¿½')).any(axis=1)

# Filter out rows where the character "¿½" is present
df = df[~mask]

# Remove rows with NaN values from all columns
df = df.dropna()


# Convert the 'GDP' column to string
df['GDP'] = df['GDP'].astype(str)

# Remove commas from 'GDP' column and convert to numeric
df['GDP'] = df['GDP'].str.replace('$', '', regex=False)
df['GDP'] = df['GDP'].str.replace(',', '', regex=False)
df['GDP'] = pd.to_numeric(df['GDP'])

# Logarithmic transformation for GDP
df['Log_GDP'] = np.log10(df['GDP'])

# Remove commas from 'Density' column and convert to numeric
df['Density'] = df['Density'].str.replace(',', '', regex=False)
df['Density'] = pd.to_numeric(df['Density'])

# Create histogram using Plotly
fig = px.histogram(df, x='Log_GDP', nbins=20, title='Distribution of Logarithmic GDP', 
                   labels={'Log_GDP': 'Log GDP (base 10)', 'count': 'Frequency'})
fig.update_layout(
    xaxis=dict(title='Log GDP (base 10)'),
    yaxis=dict(title='Frequency'),
    bargap=0.05,  # gap between bars of adjacent location coordinates
)

# Save plot as SVG file
fig.write_image("heap.svg")


fig.show()


In [17]:
import plotly.express as px

# Sample data for visualization efficiency
sampled_data = df.groupby('Country').apply(lambda x: x.sample(n=20, replace=True)).reset_index(drop=True)

# Create scatter plot using Plotly
fig = px.scatter(sampled_data, x='Density', y='Log_GDP', color='Country',
                 hover_data={'Country': True, 'Density': True, 'GDP': True},
                 labels={'Density': 'Population Density', 'Log_GDP': 'Logarithmic GDP (base 10)', 'Country': 'Country'},
                 title='Logarithmic GDP vs. Population Density',
                 width=1200, height=600)

fig.update_traces(marker=dict(size=10, opacity=0.7))
fig.update_layout(legend_title_text='Country', 
                  legend=dict(orientation="v", yanchor="middle", y=0.5, xanchor="right", x=0.99,
                              bordercolor="Black", borderwidth=2, bgcolor="white",
                              traceorder="normal", itemsizing='trace',
                              itemclick="toggleothers", itemdoubleclick="toggle"),
                  margin=dict(l=50, r=50, t=50, b=50),  # Adjust margins to avoid overlap
                  )

# Save plot as SVG file
fig.write_image("scatter.svg")

fig.show()
