In [None]:
import pandas as pd

# Load the tabular data
with open('gr_open_amzius_lytis_pilietybes-1.plain', 'r') as file:
    data = file.read()

column_names = ['Number', 'Gender', 'Age Range', 'Nationality', 'Year of declaration', 'During the quarter', 'Region code', 'Region name', 'Date']
data_frame = pd.DataFrame([x.split('|') for x in data.split('\n')], columns=column_names)
data_frame.head(20)

In [9]:
df_dropped = data_frame.drop(index=0)
df_reindexed = df_dropped.reset_index(drop=True)
df_reindexed.head()

Unnamed: 0,Number,Gender,Age Range,Nationality,Year of declaration,During the quarter,Region code,Region name,Date
0,595554,V,45-54,Lietuvos,2002,K4,13,Vilniaus m. sav.,2024-01-26
1,999037,M,45-54,Lietuvos,1993,K4,29,Šiaulių m. sav.,2024-01-26
2,1492649,V,45-54,Lietuvos,1991,K3,46,Jonavos r. sav.,2024-01-26
3,6980,M,7-17,Lietuvos,2023,K1,13,Vilniaus m. sav.,2024-01-26
4,685708,M,0-6,Lietuvos,2020,K1,29,Šiaulių m. sav.,2024-01-26


In [10]:
import geopandas as gpd

# Load the GeoJSON data
geojson_path = 'lithuania_admin_level_5_simplified.json'
geo_df = gpd.read_file(geojson_path)

# Check the GeoDataFrame
geo_df.head()


Unnamed: 0,longName,name,id,osmId,geometry
0,Pakruojo rajonas,Pakruojo r. sav.,29,17198,"POLYGON ((23.54957 56.06321, 23.56888 56.06631..."
1,Joniškio rajonas,Joniškio r. sav.,10,17199,"POLYGON ((23.12806 56.20310, 23.10836 56.24020..."
2,Telšių rajonas,Telšių r. sav.,50,17212,"POLYGON ((22.07385 56.09020, 22.11332 56.11342..."
3,Birštono savivaldybė,Birštono sav.,4,240013,"POLYGON ((24.17009 54.53606, 24.12894 54.53802..."
4,Kupiškio rajonas,Kupiškio r. sav.,22,240016,"POLYGON ((25.07893 56.01239, 25.06437 55.99798..."


In [17]:
# Rename the 'name' column in the second DataFrame to 'Region name'
geo_df.rename(columns={'name': 'Region name'}, inplace=True)
# Merge the two DataFrames on the 'Region' column
merged_df = pd.merge(geo_df, df_reindexed, on='Region name')

random_subset = merged_df.iloc[1:].sample(n=5)  # Change 5 to the number of rows you want to display

# Display the random subset
random_subset

Unnamed: 0,longName,Region name,id,osmId,geometry,Number,Gender,Age Range,Nationality,Year of declaration,During the quarter,Region code,Date
2073664,Panevėžio m. savivaldybė,Panevėžio m. sav.,31,366516,"POLYGON ((24.33405 55.69696, 24.31071 55.69518...",682004,M,0-6,Lietuvos,2022,K4,27,2024-01-26
2862949,Kauno rajono savivaldybė,Kauno r. sav.,15,366521,"POLYGON ((24.13751 54.93607, 24.13056 54.92349...",299677,V,45-54,Kita,2023,K4,52,2024-01-26
2449994,Vilniaus m. savivaldybė,Vilniaus m. sav.,56,366518,"POLYGON ((25.07842 54.59923, 25.06609 54.61973...",406821,M,25-34,Lietuvos,2019,K2,13,2024-01-26
2494192,Vilniaus m. savivaldybė,Vilniaus m. sav.,56,366518,"POLYGON ((25.07842 54.59923, 25.06609 54.61973...",542756,V,55-64,Lietuvos,2022,K1,13,2024-01-26
1170815,Šilutės rajonas,Šilutės r. sav.,46,365578,"POLYGON ((22.02396 55.39332, 21.99844 55.35436...",1969841,M,65-84,Lietuvos,2014,K4,88,2024-01-26


In [None]:
import folium

# Initialize the map centered on Lithuania
m = folium.Map(location=[55.1694, 23.8813], zoom_start=7)  # These coordinates are approximate center of Lithuania

# Add the choropleth layer
folium.Choropleth(
    geo_data=merged_df,
    name='choropleth',
    data=merged_df,
    columns=['Region name', 'Gender'],  # Replace 'data_column' with the column name that contains the data you want to visualize
    key_on='feature.properties.name',  # This should match the property in the GeoJSON that corresponds to the region
    fill_color='YlOrRd',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Data Legend'
).add_to(m)

# Display the map
m.save('choropleth_map.html')
