In [12]:
%matplotlib inline
import polars as pl
import matplotlib.pyplot as plt

# Make the graphs a bit prettier, and bigger``
plt.style.use('ggplot')

pl.Config.set_tbl_cols(60)
pl.Config.set_fmt_str_lengths(5000)

plt.rcParams['figure.figsize'] = (15, 5)

# Set Polars to show all rows without truncation
pl.Config.set_tbl_rows(-1)  # -1 means no limit

polars.config.Config

In [13]:
langs = pl.read_csv('data/World_Languages.csv')
langs[0:5]

X,Y,OBJECTID,SOVEREIGNT,TYPE_,COUNTRY,FIRST_OFFI,SECOND_OFF,THIRD_OFFI,ENGLISH_FI,FRENCH_FIR,ARABIC_FIR,SPANISH_FI,PORTUGUESE,CHINESE_FI,RUSSIAN_FI,WORLD_LANG
f64,f64,i64,str,str,str,str,str,str,i64,i64,i64,i64,i64,i64,i64,str
24.471441,-22.345302,20,"""Botswana""","""Sovereign country""","""Botswana""","""English""",,,1,0,0,0,0,0,0,"""English"""
20.633118,6.614791,21,"""Central African Republic""","""Sovereign country""","""Central African Republic""","""French""","""Sangho""",,0,1,0,0,0,0,0,"""French"""
-110.430879,56.836921,22,"""Canada""","""Sovereign country""","""Canada""","""English""","""French""",,1,1,0,0,0,0,0,"""English_French"""
-69.684882,67.856696,22,"""Canada""","""Sovereign country""","""Canada""","""English""","""French""",,1,1,0,0,0,0,0,"""English_French"""
-76.784377,79.622971,22,"""Canada""","""Sovereign country""","""Canada""","""English""","""French""",,1,1,0,0,0,0,0,"""English_French"""


In [14]:
continents = pl.read_csv('data/Countries_Continents.csv')
continents[0:5]

Continent,Country
str,str
"""Africa""","""Algeria"""
"""Africa""","""Angola"""
"""Africa""","""Benin"""
"""Africa""","""Botswana"""
"""Africa""","""Burkina Faso"""


In [15]:
print(langs.schema)
print(continents.schema)

Schema([('X', Float64), ('Y', Float64), ('OBJECTID', Int64), ('SOVEREIGNT', String), ('TYPE_', String), ('COUNTRY', String), ('FIRST_OFFI', String), ('SECOND_OFF', String), ('THIRD_OFFI', String), ('ENGLISH_FI', Int64), ('FRENCH_FIR', Int64), ('ARABIC_FIR', Int64), ('SPANISH_FI', Int64), ('PORTUGUESE', Int64), ('CHINESE_FI', Int64), ('RUSSIAN_FI', Int64), ('WORLD_LANG', String)])
Schema([('Continent', String), ('Country', String)])


In [18]:
french=langs.filter(((langs['FIRST_OFFI'] == 'French') | (langs['SECOND_OFF'] == 'French')) & 
                    ((langs['TYPE_'] == 'Sovereign country')|(langs['TYPE_'] == 'Country')) &
                    (langs['SOVEREIGNT'] == langs['COUNTRY']))
print(french['COUNTRY'].n_unique())
french['COUNTRY'].unique()

29


COUNTRY
str
"""Canada"""
"""Djibouti"""
"""Cameroon"""
"""Mauritius"""
"""Switzerland"""
"""Guinea"""
"""Chad"""
"""Madagascar"""
"""Haiti"""
"""Benin"""


In [21]:
# 1. List of countries and their continents
countries_with_continents = french.join(
    continents,
    left_on='COUNTRY',
    right_on='Country',
    how='left'
).select([
    'COUNTRY',
    'Continent'
]).unique().sort(pl.col('Continent'))  # Remove duplicates if any

print("Countries and their Continents:")
countries_with_continents

Countries and their Continents:


COUNTRY,Continent
str,str
"""Comoros""","""Africa"""
"""Madagascar""","""Africa"""
"""Central African Republic""","""Africa"""
"""Mauritius""","""Africa"""
"""Chad""","""Africa"""
"""Ivory Coast""","""Africa"""
"""Equatorial Guinea""","""Africa"""
"""Burkina Faso""","""Africa"""
"""Niger""","""Africa"""
"""Djibouti""","""Africa"""


In [23]:
countries_per_continent = french.join(
    continents,
    left_on='COUNTRY',
    right_on='Country',
    how='left'
).group_by('Continent')\
 .agg(
     pl.col('COUNTRY').n_unique().alias('country_count')
 )\
 .sort('country_count', descending=True)

countries_per_continent

Continent,country_count
str,u32
"""Africa""",22
"""Europe""",5
"""North America""",2
