In [27]:
import sys
sys.path.append("/Users/kim/Desktop/repos/Philippines_Visitor")

import pandas as pd
from db_conn import get_engine

query = """
SELECT *
FROM (
    SELECT 
        c.country,
        y.year,
        y.total_visitors,
        RANK() OVER (
            PARTITION BY y.year 
            ORDER BY y.total_visitors DESC
        ) AS rank
    from yearly_visitors y
    JOIN countries_list c
        ON y.country_id = c.country_id
) AS ranked
WHERE rank <= 10
ORDER BY year, rank;
"""

def get_top10_per_year():
    engine = get_engine()
    df = pd.read_sql(query, engine)
    return df


top_10 = get_top10_per_year()
top_10.to_csv("Top10countries.csv")

In [26]:
top_10
# Count how many times each country appears
country_counts = top_10['country'].value_counts()

print(country_counts)

country
South Korea             11
Usa                     11
Japan                   11
China                   11
Canada                  11
United Kingdom          11
Australia               10
Singapore                9
Taiwan                   9
Malaysia                 9
India                    3
Germany                  1
Residences               1
Turkey                   1
United Arab Emirates     1
Name: count, dtype: int64


In [18]:
years = top_10['year'].unique()
countries = top_10['country'].unique()

full_index = pd.MultiIndex.from_product([years, countries], names=['year', 'country'])
df_full = top_10.set_index(['year', 'country']).reindex(full_index).reset_index()
df_full['total_visitors'] = df_full['total_visitors'].fillna(0)

df_full.to_csv("Full_TOP10.csv")

In [10]:
transposed = top_10.pivot_table(
        index="country",
        columns='year',
        values='total_visitors'
)

transposed.to_csv("TOP10_transposed.csv")
transposed.info()

<class 'pandas.core.frame.DataFrame'>
Index: 15 entries, Australia to Usa
Data columns (total 11 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   2015    10 non-null     float64
 1   2016    10 non-null     float64
 2   2017    10 non-null     float64
 3   2018    10 non-null     float64
 4   2019    10 non-null     float64
 5   2020    10 non-null     float64
 6   2021    10 non-null     float64
 7   2022    10 non-null     float64
 8   2023    10 non-null     float64
 9   2024    10 non-null     float64
 10  2025    10 non-null     float64
dtypes: float64(11)
memory usage: 1.4+ KB


In [3]:
df_interpolated = transposed.interpolate(axis=1)  # interpolate along years
df_interpolated

country,Australia,Canada,China,Germany,India,Japan,Malaysia,Residences,Singapore,South Korea,Taiwan,Turkey,United Arab Emirates,United Kingdom,Usa
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2015,241187.0,156363.0,490841.0,492448.0,494055.0,495662.0,155814.0,168495.0,181176.0,1339678.0,177670.0,169976.333333,162282.666667,154589.0,779217.0
2016,251098.0,175631.0,675663.0,628854.7,582046.3,535238.0,139133.0,157595.0,176057.0,1475081.0,229303.0,210635.0,191967.0,173299.0,869463.0
2017,259433.0,200640.0,968447.0,840358.0,712269.0,584180.0,143566.0,156101.5,168637.0,1607821.0,236777.0,218754.0,200731.0,182708.0,957813.0
2018,279828.0,226446.0,1257962.0,1049248.0,840534.7,631821.0,145246.0,158521.0,171796.0,1624251.0,242411.0,228622.0,214833.0,201044.0,1034471.0
2019,286170.0,238850.0,1743309.0,1389802.0,1036295.0,682788.0,139882.0,149238.5,158595.0,1989322.0,327273.0,287917.333333,248561.666667,209206.0,1064440.0
2020,55330.0,55273.0,170432.0,25893.0,29014.0,136664.0,187217.25,237770.5,288323.75,338877.0,48644.0,45756.0,42868.0,39980.0,211816.0
2021,,6781.0,9674.0,8438.0,7202.0,15024.0,11796.0,8568.0,7512.0,6456.0,5415.0,4374.0,2733.0,4348.0,39326.0
2022,137974.0,121413.0,39627.0,45584.5,51542.0,99557.0,46805.0,50126.5,53448.0,428014.0,346269.0,264524.0,182779.0,101034.0,505089.0
2023,266551.0,221920.0,263836.0,277750.7,291665.3,305580.0,97639.0,123434.5,149230.0,1450858.0,194851.0,181466.666667,168082.333333,154698.0,903299.0
2024,272215.0,223944.0,312222.0,337586.7,362951.3,388316.0,95713.0,126488.5,157264.0,1569071.0,208736.0,192589.333333,176442.666667,160296.0,947891.0


In [4]:
import bar_chart_race as bcr

bcr.bar_chart_race(
    df=df_interpolated,
    filename='Top 10 Countries Over Time.gif',          # optional: saves video
    title='Top Countries by Visitors Over Time',
    period_fmt='%Y',           # depends on your index format
    steps_per_period=50,
    period_length=500
)

  df_values.iloc[:, 0] = df_values.iloc[:, 0].fillna(method='ffill')
  ax.set_yticklabels(self.df_values.columns)
  ax.set_xticklabels([max_val] * len(ax.get_xticks()))
MovieWriter imagemagick unavailable; using Pillow instead.
