In [21]:
"""
Configuration file for Crime Prediction project
"""
from pathlib import Path

# Project Paths
PROJECT_ROOT = Path.cwd()  # Usa la directory corrente
DATA_DIR = PROJECT_ROOT / "Data"
MODEL_DIR = DATA_DIR / "models"


# Data files
WDI_2021_PATH = DATA_DIR / "wdi_2021.csv"
WDI_2023_PATH = DATA_DIR / "wdi_2023.csv"
OC_2021_PATH = DATA_DIR / "oc_2021.csv"
OC_2023_PATH = DATA_DIR / "oc_2023.csv"

# Model Path
MODEL_PATH = MODEL_DIR / "random_forest_model.pkl"


COUNTRY_NAME_MAPPING_WDI_TO_OC = {
    'Bahamas, The': 'Bahamas',
    'Brunei Darussalam': 'Brunei',
    'Czechia': 'Czech Republic',
    "Cote d'Ivoire": "Côte d'Ivoire",
    'Egypt, Arab Rep.': 'Egypt',
    'eSwatini': 'Eswatini',
    'Gambia, The': 'Gambia',
    'Iran, Islamic Rep.': 'Iran',
    "Korea, Dem. People's Rep.": 'Korea, DPR',
    'Korea, Rep,': 'Korea, Rep.',
    'Kyrgyz Republic': 'Kyrgyzstan',
    'Lao PDR': 'Laos',
    'Micronesia, Fed. Sts.': 'Micronesia (Federated States of)',
    'Russian Federation': 'Russia',
    'Slovak Republic': 'Slovakia',
    'Somalia, Fed. Rep.': 'Somalia',
    'St, Kitts and Nevis': 'St. Kitts and Nevis',
    'St, Lucia': 'St. Lucia',
    'St, Vincent and the Grenadines': 'St. Vincent and the Grenadines',
    'Syrian Arab Republic': 'Syria',
    'Turkiye': 'Turkey',
    'Venezuela, RB': 'Venezuela',
    'Viet Nam': 'Vietnam',
    'Yemen, Rep.': 'Yemen',
    'Congo, Dem, Rep.': 'Congo, Dem. Rep,',
    'Congo, Rep,': 'Congo, Rep.',
}

# These indicators have been selected after the EDA studio on notebook
WDI_INDICATORS = [
        'NY.GDP.PCAP.CD',           # GDP per capita (current US$) X
        'NY.GDP.MKTP.KD.ZG',        # GDP growth (annual %) X
        'SL.UEM.TOTL.ZS',           # Unemployment, total (% of labor force) X
        'SL.UEM.1524.ZS',           # Youth unemployment (ages 15-24, %) X
        'FP.CPI.TOTL.ZG',           # Inflation, consumer prices (annual %) X
        'NE.TRD.GNFS.ZS',           # Trade (% of GDP) X
    

    # Low education and school dropout could push to get involved in crime
    
        'SE.SEC.ENRR',              # School enrollment, secondary (% gross) X
    

    # Weak healtcare systems can create illegal market for drug/organs. Or large young population could indicate more potential recruits
    
        'SP.DYN.LE00.IN',           # Life expectancy at birth (years) X
        'SP.POP.TOTL',              # Population, total X
        'SP.URB.TOTL.IN.ZS',        # Urban population (% of total) X
        'SP.POP.1564.TO.ZS',        # Population ages 15-64 (% of total) - working age X
    

    # These could be the most important, indicating high corruption, weak rule of law or politic instability: each one is a determinant factor in the proliferation of organized crime 
    
        'CC.EST',                   # Control of Corruption (estimate) X
        'GE.EST',                   # Government Effectiveness (estimate) X
        'PV.EST',                   # Political Stability and Absence of Violence (estimate) X
        'RL.EST',                   # Rule of Law (estimate) X
        'RQ.EST',                   # Regulatory Quality (estimate) X
        'VA.EST',                   # Voice and Accountability (estimate) X
    
    ]

COUNTRY_TO_CONTINENT_MAPPING = {'Afghanistan': 'Asia',
    'Libya': 'Africa',
    'Myanmar': 'Asia',
    'Yemen': 'Asia',
    'Central African Republic': 'Africa',
    'Somalia': 'Africa',
    'Korea, DPR': 'Asia',
    'Venezuela': 'Americas',
    'South Sudan': 'Africa',
    'Syria': 'Asia',
    'Nicaragua': 'Americas',
    'Burundi': 'Africa',
    'Turkmenistan': 'Asia',
    'Equatorial Guinea': 'Africa',
    'Eritrea': 'Africa',
    'Congo, Dem. Rep.': 'Africa',
    'Mali': 'Africa',
    'Comoros': 'Africa',
    'Chad': 'Africa',
    'Haiti': 'Americas',
    'Tajikistan': 'Asia',
    'Guinea-Bissau': 'Africa',
    'Sudan': 'Africa',
    'Suriname': 'Americas',
    'Iran': 'Asia',
    'Zimbabwe': 'Africa',
    'Guinea': 'Africa',
    'Mauritania': 'Africa',
    'Cameroon': 'Africa',
    'Iraq': 'Asia',
    'El Salvador': 'Americas',
    'Belarus': 'Europe',
    'Liberia': 'Africa',
    'Gabon': 'Africa',
    'Congo, Rep.': 'Africa',
    'Mozambique': 'Africa',
    'Papua New Guinea': 'Oceania',
    'Belize': 'Americas',
    'Eswatini': 'Africa',
    'Madagascar': 'Africa',
    'Turkey': 'Asia',
    'Paraguay': 'Americas',
    'Sri Lanka': 'Asia',
    'Lebanon': 'Asia',
    'Laos': 'Asia',
    'Burkina Faso': 'Africa',
    'Niger': 'Africa',
    'Benin': 'Africa',
    'Cambodia': 'Asia',
    'Russia': 'Europe',
    'Kyrgyzstan': 'Asia',
    'Egypt': 'Africa',
    'Timor-Leste': 'Asia',
    'Uganda': 'Africa',
    'Bosnia and Herzegovina': 'Europe',
    'Uzbekistan': 'Asia',
    'Moldova': 'Europe',
    'Lesotho': 'Africa',
    'Pakistan': 'Asia',
    'Azerbaijan': 'Asia',
    'Nepal': 'Asia',
    'Saudi Arabia': 'Asia',
    'Guyana': 'Americas',
    'Sierra Leone': 'Africa',
    'Honduras': 'Americas',
    'Guatemala': 'Americas',
    'Maldives': 'Asia',
    'Tanzania': 'Africa',
    'Mexico': 'Americas',
    'Philippines': 'Asia',
    'Indonesia': 'Asia',
    'Kiribati': 'Oceania',
    'Peru': 'Americas',
    'Algeria': 'Africa',
    'Djibouti': 'Africa',
    'Bangladesh': 'Asia',
    'Cyprus': 'Europe',
    'Angola': 'Africa',
    'Togo': 'Africa',
    'Tunisia': 'Africa',
    'Ukraine': 'Europe',
    'Zambia': 'Africa',
    'Kazakhstan': 'Asia',
    'Namibia': 'Africa',
    'Malawi': 'Africa',
    'Antigua and Barbuda': 'Americas',
    'Brunei': 'Asia',
    'Morocco': 'Africa',
    'Panama': 'Americas',
    'Bhutan': 'Asia',
    'Montenegro': 'Europe',
    'Ethiopia': 'Africa',
    'Vietnam': 'Asia',
    'Thailand': 'Asia',
    'Dominican Republic': 'Americas',
    'Bolivia': 'Americas',
    'Ecuador': 'Americas',
    'Hungary': 'Europe',
    'Brazil': 'Americas',
    'Sao Tome and Principe': 'Africa',
    'Serbia': 'Europe',
    'St. Kitts and Nevis': 'Americas',
    'Nauru': 'Oceania',
    'Solomon Islands': 'Oceania',
    'United Arab Emirates': 'Asia',
    "Côte d'Ivoire": 'Africa',
    'Albania': 'Europe',
    'Malta': 'Europe',
    'Grenada': 'Americas',
    'Vanuatu': 'Oceania',
    'Greece': 'Europe',
    'Gambia': 'Africa',
    'Dominica': 'Americas',
    'Oman': 'Asia',
    'Mongolia': 'Asia',
    'Seychelles': 'Africa',
    'San Marino': 'Europe',
    'Georgia': 'Asia',
    'North Macedonia': 'Europe',
    'Slovakia': 'Europe',
    'Tonga': 'Oceania',
    'Kenya': 'Africa',
    'Bulgaria': 'Europe',
    'Trinidad and Tobago': 'Americas',
    'Palau': 'Oceania',
    'Cuba': 'Americas',
    'St. Vincent and the Grenadines': 'Americas',
    'Jamaica': 'Americas',
    'India': 'Asia',
    'Qatar': 'Asia',
    'Bahrain': 'Asia',
    'Fiji': 'Oceania',
    'Ghana': 'Africa',
    'Botswana': 'Africa',
    'Bahamas': 'Americas',
    'Kuwait': 'Asia',
    'Mauritius': 'Africa',
    'Rwanda': 'Africa',
    'Jordan': 'Asia',
    'St. Lucia': 'Americas',
    'Colombia': 'Americas',
    'South Africa': 'Africa',
    'Costa Rica': 'Americas',
    'Monaco': 'Europe',
    'China': 'Asia',
    'Armenia': 'Asia',
    'Nigeria': 'Africa',
    'Senegal': 'Africa',
    'Marshall Islands': 'Oceania',
    'Samoa': 'Oceania',
    'Poland': 'Europe',
    'Micronesia (Federated States of)': 'Oceania',
    'Malaysia': 'Asia',
    'Croatia': 'Europe',
    'Argentina': 'Americas',
    'Romania': 'Europe',
    'Slovenia': 'Europe',
    'Israel': 'Asia',
    'Tuvalu': 'Oceania',
    'Barbados': 'Americas',
    'Chile': 'Americas',
    'Czech Republic': 'Europe',
    'Italy': 'Europe',
    'Portugal': 'Europe',
    'Cabo Verde': 'Africa',
    'Spain': 'Europe',
    'France': 'Europe',
    'Switzerland': 'Europe',
    'Belgium': 'Europe',
    'United States': 'Americas',
    'Canada': 'Americas',
    'Lithuania': 'Europe',
    'Ireland': 'Europe',
    'Japan': 'Asia',
    'Australia': 'Oceania',
    'Netherlands': 'Europe',
    'Sweden': 'Europe',
    'Germany': 'Europe',
    'Austria': 'Europe',
    'Uruguay': 'Americas',
    'Luxembourg': 'Europe',
    'United Kingdom': 'Europe',
    'Latvia': 'Europe',
    'Singapore': 'Asia',
    'Estonia': 'Europe',
    'New Zealand': 'Oceania',
    'Norway': 'Europe',
    'Andorra': 'Europe',
    'Korea, Rep.': 'Asia',
    'Denmark': 'Europe',
    'Iceland': 'Europe',
    'Liechtenstein': 'Europe',
    'Finland': 'Europe'}

MISSING_SUMMARY = ['School enrollment, secondary (% gross)',
 'Trade (% of GDP)',
 'Inflation, consumer prices (annual %)',
 'Unemployment, total (% of total labor force) (modeled ILO estimate)',
 'Unemployment, youth total (% of total labor force ages 15-24) (modeled ILO estimate)',
 'GDP per capita (current US$)',
 'GDP growth (annual %)']


# Model Config
RANDOM_STATE = 42
TEST_SIZE =0.2

# Target Column
TARGET_COLUMN = 'criminality_oc'

# Columns to drop
DROP_COLUMNS = ['country', TARGET_COLUMN]

# Random forest Params (obtained through GridSearch)
RF_PARAMS =  {
    'max_depth': 15, 
    'max_features': None, 
    'min_samples_leaf': 1, 
    'min_samples_split': 2, 
    'n_estimators': 300
    }


In [2]:
import pandas as pd

from sklearn.preprocessing import StandardScaler
from sklearn.feature_extraction import DictVectorizer

import numpy as np

In [91]:

def load_data(wdi_path: str, oc_path: str, year_used: int) -> tuple[pd.DataFrame, pd.DataFrame]:
    """
    Load WDI and OC datasets 

    Args:
        wdi_path (str): Path to WDI CSV File
        oc_path (str): Path to OC CSV file

    Returns:
        pd.DataFrame: Merged Dataframe
    """
    
    df_wdi = pd.read_csv(wdi_path)
    df_oc = pd.read_csv(oc_path)
    
    return df_wdi, df_oc


def align_wdi_countries_names(df_wdi: pd.DataFrame, oc_countries_set: set) -> pd.DataFrame:  
    """
    Aligns countries names in wdi Dataset with OC countries

    Args:
        df_wdi (pd.DataFrame): wdi dataframe from csv file (2021 or 2023)
        oc_countries_set (set): set generated from df_oc['Country] column

    Returns:
        pd.DataFrame: df_wdi with correct countries' names
    """
    df_wdi['Country_standardized'] = df_wdi['Country Name'].replace(COUNTRY_NAME_MAPPING_WDI_TO_OC)
    df_wdi_correct_countries = df_wdi[df_wdi['Country_standardized'].isin(oc_countries_set)].copy()
    return df_wdi_correct_countries


def select_wdi_indicators(df_wdi_correct_countries: pd.DataFrame,year_used, indicator_codes=WDI_INDICATORS ) -> pd.DataFrame:
    """ 
    Function to filter wdi dataset indexes, remove useles columns and pivot the df
    """
    df_wdi_filtered = df_wdi_correct_countries[df_wdi_correct_countries['Indicator Code'].isin(indicator_codes)].copy()
    
    found = set(df_wdi_filtered['Indicator Code'].unique())
    missing = set(indicator_codes) - found
    if missing:
        print(f"\n Indicator not found in WDI:")
        for code in missing:
            print(f"    {code}")
    
    # after columns have been selected, remove useless columns
    del df_wdi_filtered['Country Name']
    del df_wdi_filtered['Country Code']
    
    # put country name on first place
    col = "Country_standardized"
    cols = [col] + [c for c in df_wdi_filtered.columns if c != col]
    df_wdi_filtered = df_wdi_filtered[cols]
    
    df_wdi_filtered = df_wdi_filtered.pivot(index = 'Country_standardized', columns = 'Indicator Name', values = f'{str(year_used)}')
    return df_wdi_filtered


def add_continents_to_wdi(df_wdi_filtered: pd.DataFrame, country_to_continent: dict = COUNTRY_TO_CONTINENT_MAPPING ):
    df_wdi_filtered['continent'] = df_wdi_filtered.index.map(country_to_continent) 
    col = 'continent'
    df_wdi_filtered= df_wdi_filtered[[col] + [c for c in df_wdi_filtered.columns if c != col]] 
    df_wdi_with_continents = df_wdi_filtered.copy()
    return df_wdi_with_continents

def handling_wdi_missing_values(df_wdi_with_continents: pd.DataFrame, missing_indexes: list= MISSING_SUMMARY):
    if df_wdi_with_continents.isnull().sum().sum() >0:
        for col in missing_indexes:
            # Fill null for each continent
            for continent in df_wdi_with_continents['continent'].unique():
                print(df_wdi_with_continents['continent'].unique())
                if pd.isnull(continent):
                    continue
                # select countries in the continent
                continent_mask = df_wdi_with_continents['continent'] == continent
                # count missing values in this continent
                missing_in_continent = df_wdi_with_continents.loc[continent_mask, col].isnull().sum()
                
                if missing_in_continent > 0:
                    # compute continent mean
                    continent_mean = df_wdi_with_continents.loc[continent_mask, col].mean()
                    
                    # If all the countries of a continent are missing, use global mean
                    if pd.isnull(continent_mean):
                        continent_mean = df_wdi_with_continents[col].mean()
                        print(f"    {continent:20s}: {missing_in_continent:2d} missing → Global mean = {continent_mean:.2f}")
                    else:
                        print(f"    {continent:20s}: {missing_in_continent:2d} missing → Continent mean = {continent_mean:.2f}")
                    # fillna in the df
                    df_wdi_with_continents.loc[continent_mask, col] = df_wdi_with_continents.loc[continent_mask, col].fillna(continent_mean)
            print(df_wdi_with_continents)
            # verify no more missing values
            remaining = df_wdi_with_continents[col].isnull().sum()            
            if remaining > 0:
                print(f"  !!{remaining} missing values → Use global mean ")
                global_mean = df_wdi_with_continents[col].mean()
                df_wdi_with_continents[col].fillna(global_mean, inplace=True)        
                
            # final verify
            # total_missing_after = df_wdi_with_continents[missing_indexes.index].isnull().sum().sum()
            # total_missing_after = df_wdi_with_continents.isnull().sum().sum()
            # assert total_missing_after == 0, f"STILL {total_missing_after} MISSING!!"
            print(f"0 missing values")
            return df_wdi_with_continents
    else:
        print(f"Sustitution completed: 0 missing values")
        df_wdi_no_null_values = df_wdi_with_continents.copy()
        return df_wdi_no_null_values
            
def rename_columns(df: pd.DataFrame, suffix: str, remove_punctuation: bool=True, no_white_spaces: bool = True, keep_4_words: bool = True, add_suffix: bool = True ) -> pd.DataFrame:
    import re
    columns = []
    for col in df.columns:
        # Remove punctuation
        if remove_punctuation:
            col = re.sub(r"[^\w\s]", "", col)
        
        if no_white_spaces: 
            # substitue white spaces with underscores
            col = re.sub(r"\s+", "_", col.strip())
        if keep_4_words:
            # keep only the first 4 words
            parts = col.split("_")[:4]
            col = "_".join(parts)
        if add_suffix:
            # add '_wdi' suffix
            col = f"{col}_{suffix}"
            
        # final clean
        col = re.sub(r"_+", "_", col).strip("_").lower()
        columns.append(col)
    df.columns = columns
    return df


# df_oc.columns = [modify_name_cols(c, suffix='_oc', keep_4_words=False, ) for c in df_oc.columns]
# df_wdi_clean.columns = [modify_name_cols(c, suffix='_wdi' ) for c in df_wdi_clean.columns]
# del df_oc['index_oc']

def merge_datasets(df_wdi_clean:  pd.DataFrame, df_oc: pd.DataFrame) -> pd.DataFrame:
    """
    Merge the datasets and remove OC columns except target

    Args:
        df_wdi_clean (pd.DataFrame): _description_
        df_oc (pd.DataFrame): _description_

    Returns:
        pd.DataFrame: _description_
    """
    df_merged =  df_wdi_clean.merge(df_oc, left_on='country_standardized_wdi', right_on = 'country_oc', how='left', validate='one_to_one')
    
    # remove not necessary columns
    del df_merged['country_oc']
    del df_merged['continent_oc']
    del df_merged['region_oc']
    
    # rename column
    df_merged = df_merged.rename(columns={'country_standardized_wdi': 'country', 'continent_wdi':'continent'})
    
    # drop OC columns except target, we work with WDI data
    oc_cols_to_drop = [col for col in df_merged.columns if col.endswith('_oc') and col != TARGET_COLUMN]
    df_merged = df_merged.drop(columns=oc_cols_to_drop)
    return df_merged


def get_scaler(X_train: pd.DataFrame) -> StandardScaler:
    """
    Fit and return StandardScaler on training data

    Args:
        X_train (pd.DataFrame): Training features

    Returns:
        StandardScaler: Fitted Scaler
    """
    scaler = StandardScaler()
    scaler.fit(X_train)
    return scaler


def scale_features(X: pd.DataFrame, scaler: StandardScaler, ) -> np.ndarray:
    """
    Scale Features using fittest scaler 

    Args:
        X (pd.DataFrame): features to scale
        scaler (StandardScaler): fitted std scaler

    Returns:
        np.ndarray: Scaled features
    """
    return scaler.transform(X)


def get_vectorizer(X_train: pd.DataFrame) -> DictVectorizer:
    dv = DictVectorizer(sparse=False)
    train_dict = X_train.to_dict(orient='records')
    dv.fit(train_dict)
    return dv


def vectorize_df(X: pd.DataFrame, dv: DictVectorizer):
    x_dict = X.to_dict(orient='records')
    X_vectorized = dv.transform(x_dict)
    return X_vectorized



In [92]:
df_wdi, df_oc = load_data(wdi_path=WDI_2021_PATH, oc_path=OC_2021_PATH, year_used=2021)

In [93]:
df_wdi = align_wdi_countries_names(df_wdi,oc_countries_set=set(df_oc['Country'].tolist()))


In [94]:
df_wdi = select_wdi_indicators(df_wdi_correct_countries=df_wdi, year_used=2021)


In [95]:
df_wdi = add_continents_to_wdi(df_wdi_filtered=df_wdi, )


In [96]:
df_wdi = handling_wdi_missing_values(df_wdi_with_continents=df_wdi, )

['Asia' 'Europe' 'Africa' 'Americas' 'Oceania']
    Asia                : 11 missing → Continent mean = 87.30
['Asia' 'Europe' 'Africa' 'Americas' 'Oceania']
    Europe              :  1 missing → Continent mean = 106.50
['Asia' 'Europe' 'Africa' 'Americas' 'Oceania']
    Africa              : 29 missing → Continent mean = 62.92
['Asia' 'Europe' 'Africa' 'Americas' 'Oceania']
    Americas            :  8 missing → Continent mean = 94.33
['Asia' 'Europe' 'Africa' 'Americas' 'Oceania']
    Oceania             :  4 missing → Continent mean = 93.50
Indicator Name       continent  Control of Corruption: Estimate  \
Country_standardized                                              
Afghanistan               Asia                        -1.152266   
Albania                 Europe                        -0.577180   
Algeria                 Africa                        -0.635961   
Andorra                 Europe                         1.279687   
Angola                  Africa                 

In [98]:
df_wdi = rename_columns(df_wdi, suffix='_wdi')
df_wdi

Unnamed: 0_level_0,continent_wdi,control_of_corruption_estimate_wdi,gdp_growth_annual_wdi,gdp_per_capita_current_wdi,government_effectiveness_estimate_wdi,inflation_consumer_prices_annual_wdi,life_expectancy_at_birth_wdi,political_stability_and_absence_wdi,population_ages_1564_of_wdi,population_total_wdi,regulatory_quality_estimate_wdi,rule_of_law_estimate_wdi,school_enrollment_secondary_gross_wdi,trade_of_gdp_wdi,unemployment_total_of_total_wdi,unemployment_youth_total_of_wdi,urban_population_of_total_wdi,voice_and_accountability_estimate_wdi
Country_standardized,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
Afghanistan,Asia,-1.152266,-20.738839,356.496214,-1.670588,5.133203,60.417,-2.518676,53.739325,40000412.0,-1.311682,-1.877234,87.299560,51.411716,11.994,16.395,26.314,-1.568389
Albania,Europe,-0.577180,8.969576,6413.282808,-0.035985,2.041472,76.844,0.196372,67.366563,2811666.0,0.175859,-0.280716,97.665033,75.590322,11.474,27.244,62.969,0.085589
Algeria,Africa,-0.635961,3.800000,4160.559267,-0.652746,7.226063,75.208,-0.992457,63.069061,44761099.0,-1.181963,-0.833023,62.915220,46.837059,13.630,33.843,74.261,-1.013779
Andorra,Europe,1.279687,8.286779,42425.699676,1.748928,,82.331,1.581259,72.361495,78364.0,1.359350,1.642051,98.040421,,,,87.858,1.032500
Angola,Africa,-0.649476,1.199211,1925.874661,-1.126360,25.754295,62.958,-0.710481,52.346046,34532429.0,-0.670015,-0.983754,53.135154,74.464499,15.799,30.171,67.460,-0.859024
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Venezuela,Americas,-1.622940,,,-1.874598,,71.536,-1.454970,63.991769,28237826.0,-2.215186,-2.300895,94.333887,,7.037,12.983,88.325,-1.617731
Vietnam,Asia,-0.308163,2.553729,3704.193559,0.243436,1.834716,74.145,-0.117782,67.916650,98935098.0,-0.404661,-0.172222,92.769021,186.675833,2.385,6.907,38.052,-1.308746
Yemen,Asia,-1.668677,,,-2.325698,,66.019,-2.580460,56.126745,37140230.0,-2.040683,-1.804350,87.299560,,18.287,34.130,38.546,-1.680721
Zambia,Africa,-0.757705,6.234922,1127.160779,-0.840983,22.020768,62.363,0.052414,55.279825,19603607.0,-0.570348,-0.606376,62.915220,86.208511,5.199,9.367,45.192,-0.372968


In [100]:

df_oc

Unnamed: 0.1,Unnamed: 0,Continent,Region,Country,Criminality,Criminal markets,Human trafficking,Human smuggling,Arms trafficking,Flora crimes,...,International cooperation,National policies and laws,Judicial system and detention,Law enforcement,Territorial integrity,Anti-money laundering,Economic regulatory capacity,Victim and witness support,Prevention,Non-state actors
0,0,Asia,Western Asia,Turkey,6.89,6.40,7.0,9.0,9.0,4.0,...,3.5,4.5,2.0,3.0,6.5,2.0,4.0,4.0,3.5,3.5
1,1,Africa,West Africa,Cabo Verde,4.04,3.70,4.5,2.0,2.5,2.5,...,8.5,7.0,7.0,6.0,7.5,5.0,6.0,3.5,6.0,7.0
2,2,Africa,Southern Africa,South Africa,6.63,6.00,4.5,4.0,8.0,3.5,...,6.5,6.5,6.0,5.0,6.0,6.0,6.0,4.0,5.0,7.0
3,3,Americas,South America,Colombia,7.66,7.20,7.5,7.0,8.0,6.0,...,9.0,7.0,5.5,6.0,4.5,6.0,5.0,3.5,4.5,6.5
4,4,Americas,South America,Peru,6.35,6.20,7.0,6.0,5.5,7.0,...,5.0,6.0,6.0,5.0,5.0,4.5,4.0,3.5,3.0,4.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
188,188,Americas,Caribbean,Jamaica,5.91,4.20,5.0,3.0,8.5,1.5,...,6.5,5.5,4.0,6.0,6.5,4.5,4.5,5.0,6.0,6.5
189,189,Americas,Caribbean,St. Vincent and the Grenadines,3.30,2.85,2.5,1.0,4.0,1.0,...,6.0,6.5,6.0,4.5,6.0,5.5,4.0,3.0,6.0,6.0
190,190,Europe,Central & Eastern Europe,Slovenia,4.29,3.95,4.0,5.5,3.5,2.0,...,6.5,6.5,6.0,6.0,7.5,6.5,5.0,6.0,6.0,6.0
191,191,Americas,Caribbean,St. Lucia,4.09,3.05,4.0,1.5,5.0,2.0,...,6.5,6.5,6.0,4.5,5.5,4.0,5.5,5.0,6.0,4.0


In [101]:
MODEL_DIR

PosixPath('/home/paccale/Documenti/GitHub/global_crime_indicators/Data/models')