In [1]:
import pandas as pd
import altair as alt

# Define file paths
csv_file = 'data/happiness_report.csv'
semicolon_file = 'data/happiness_report_semicolon.csv'
metadata_file = 'data/happiness_report_metadata.csv'
excel_file = 'data/happiness_report.xlsx'
no_header_file = 'data/happiness_report_no_header.csv'

""" 
1. Load datasets
"""
df_csv = pd.read_csv(csv_file)
print("CSV File:")
print(df_csv.head())

df_semicolon = pd.read_csv(semicolon_file, sep=';', decimal=',')
print("\nSemicolon-Separated CSV File:")
print(df_semicolon.head())

df_metadata = pd.read_csv(metadata_file, sep=',', skiprows=2, header=0)
print("\nMetadata CSV File:")
print(df_metadata.head())

df_excel = pd.read_excel(excel_file)
print("\nExcel File:")
print(df_excel.head())

headers = ['country', 'happiness_score', 'gdp_per_capita', 'life_expectancy', 'freedom']
happy_header = pd.read_csv(no_header_file, header=None, names=headers)
print("\nCSV Without Header (After Adding Headers):")
print(happy_header.head())


"""
2. Visualization
"""



CSV File:
       country  happiness_score  GDP_per_capita  life_expectancy   freedom
0       Norway            7.537        1.616463         0.796667  0.635423
1      Denmark            7.522        1.482383         0.792566  0.626007
2      Iceland            7.504        1.480633         0.833552  0.627163
3  Switzerland            7.494        1.564980         0.858131  0.620071
4      Finland            7.469        1.443572         0.809158  0.617951

Semicolon-Separated CSV File:
       country  happiness_score  GDP_per_capita  life_expectancy   freedom
0       Norway            7.537        1.616463         0.796667  0.635423
1      Denmark            7.522        1.482383         0.792566  0.626007
2      Iceland            7.504        1.480633         0.833552  0.627163
3  Switzerland            7.494        1.564980         0.858131  0.620071
4      Finland            7.469        1.443572         0.809158  0.617951

Metadata CSV File:
       country  happiness_score  GDP_pe

'\n2. Visualization\n'

In [2]:
# A) Scatter Plot from happy_header
#    Plot life_expectancy (y-axis) vs gdp_per_capita (x-axis).
scatter = alt.Chart(happy_header).mark_point().encode(
    x=alt.X('gdp_per_capita', title='GDP per Capita'),
    y=alt.Y('life_expectancy', title='Life Expectancy')
).properties(
    title='Life Expectancy vs. GDP per Capita'
)

scatter


In [3]:
# B) Bar Chart from df_metadata
#    For example, show the top 10 countries by happiness_score.
df_metadata_top10 = df_metadata.nlargest(10, 'happiness_score')

histogram = alt.Chart(df_metadata).mark_bar().encode(
    alt.X(
        'happiness_score:Q',
        bin=alt.Bin(maxbins=20, extent=[2, 8]), 
        title='Happiness Score (binned)'
    ),
    alt.Y('count()', title='Count of Records')
).properties(
    title='Distribution of Happiness Scores'
)

histogram