# Explore Mexico Issuers


Número de Emisores en Mexico: 269  
Data relevance  
    -avg  
    -segmentar /grouping  
  
ESG Scoring & Sust Rating  
    -avg  
    -segment /groupin  
    -Top & Bottom Outlayers  


In [2]:
import pandas as pd
import numpy as np

In [4]:
path = r"C:\Users\n740789\Documents\Projects_local\DataSets\DATAFEED\ficheros_tratados\2025\20250201_Equities_feed_IssuerLevel_sinOVR.csv"

df = pd.read_csv(path)

In [12]:
# remove limit output cell
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 500)

In [14]:
df.info(20)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 69264 entries, 0 to 69263
Data columns (total 276 columns):
 #    Column                                              Dtype  
---   ------                                              -----  
 0    isin                                                object 
 1    instrument_type                                     object 
 2    issuer_name                                         object 
 3    issuer_country                                      object 
 4    gics2_industry                                      object 
 5    region                                              object 
 6    company_inheriting                                  bool   
 7    parent_company                                      object 
 8    esg_score                                           int64  
 9    esg_score_relevance                                 float64
 10   e_score                                             float64
 11   s_score                   

In [17]:
df = df[df.issuer_country=='Mexico'].copy()

In [18]:
df.shape[0]

269

In [27]:
target_cols = ["esg_score", "esg_score_relevance", "esg_rating", "sustainability_rating"]

# Initialize a dictionary to store results
summary_data = {}

for col in target_cols:
    if df[col].dtype in ['int64', 'float64']:  # Numeric Columns
        summary_data[col] = df[col].describe().round(2)
    else:  # Categorical Columns
        summary_data[col] = {
            "Unique Count": df[col].nunique(),
            "Most Common Value": df[col].mode()[0],  # Mode
            "Top 3 Categories": df[col].value_counts().head(3).to_dict(),  # Top 3
            "Most Common Freq": df[col].value_counts().iloc[0]  # Frequency of most common
        }

# Convert to DataFrame for better visualization
summary_df = pd.DataFrame(summary_data)

In [28]:
summary_df

Unnamed: 0,esg_score,esg_score_relevance,esg_rating,sustainability_rating
25%,33.0,63.96,,
50%,54.0,84.11,,
75%,67.0,94.29,,
Most Common Freq,,,61,66
Most Common Value,,,A,Good
Top 3 Categories,,,"{'A': 61, 'A-': 53, 'C': 45}","{'Good': 66, 'Sufficient': 55, 'Average': 47}"
Unique Count,,,7,7
count,269.0,269.0,,
max,84.0,100.0,,
mean,50.66,74.3,,


In [38]:
# Define target columns
target_numeric_cols = ["esg_score", "esg_score_relevance"]
target_categorical_cols = ["esg_rating", "sustainability_rating"]

# Define categorical order mapping
esg_rating_order = {'A+': 7, 'A': 6, 'A-': 5, 'B': 4, 'C+': 3, 'C': 2, 'C-': 1}
sustainability_rating_order = {'Outstanding': 7, 'Leader': 6, 'Good': 5, 'Sufficient': 4, 'Average': 3, 'Limited': 2, 'Poor': 1}

# -------------------- NUMERIC SUMMARY --------------------
numeric_summary = {}

for col in target_numeric_cols:
    desc = df[col].describe().round(2)
    
    if not df[col].dropna().empty:  # Ensure non-empty column
        mode_val = df[col].mode()[0]  # Most common value
        mode_freq = df[col].value_counts().iloc[0]  # Frequency of most common value
    else:
        mode_val, mode_freq = None, None  # Handle empty cases
    
    # Use pd.concat() instead of append()
    numeric_summary[col] = pd.concat([desc, pd.Series({
        "Most Common Value": mode_val,
        "Most Common Freq": mode_freq
    })])

numeric_summary_df = pd.DataFrame(numeric_summary)

# -------------------- CATEGORICAL SUMMARY --------------------
categorical_summary = {}

for col in target_categorical_cols:
    valid_data = df[col].dropna()  # Remove NaNs for calculations
    
    if not valid_data.empty:  # Ensure there's data to process
        categorical_summary[col] = {
            "Unique Count": valid_data.nunique(),
            "Most Common Value": valid_data.mode()[0],  # Mode
            "Most Common Freq": valid_data.value_counts().iloc[0],  # Frequency of most common
            "Top 3 Categories": valid_data.value_counts().head(3).to_dict()  # Top 3
        }
    else:
        categorical_summary[col] = {
            "Unique Count": 0,
            "Most Common Value": None,
            "Most Common Freq": None,
            "Top 3 Categories": {}
        }

categorical_summary_df = pd.DataFrame(categorical_summary)

# -------------------- CATEGORICAL PERCENTILES --------------------
categorical_data = df[target_categorical_cols].copy()

# Convert categorical values to numbers based on ranking (handling NaNs)
categorical_data["esg_rating"] = categorical_data["esg_rating"].map(esg_rating_order).dropna()
categorical_data["sustainability_rating"] = categorical_data["sustainability_rating"].map(sustainability_rating_order).dropna()

# Compute percentiles if data is not empty
if not categorical_data.empty:
    categorical_percentiles = categorical_data.quantile([0.25, 0.50, 0.75]).round(2)
    
    # Map back to original categorical labels for interpretation
    categorical_percentiles["esg_rating"] = categorical_percentiles["esg_rating"].map({v: k for k, v in esg_rating_order.items()})
    categorical_percentiles["sustainability_rating"] = categorical_percentiles["sustainability_rating"].map({v: k for k, v in sustainability_rating_order.items()})
else:
    categorical_percentiles = pd.DataFrame(columns=target_categorical_cols)

# -------------------- MERGE CATEGORICAL SUMMARY + PERCENTILES --------------------
merged_categorical_df = pd.concat([categorical_summary_df, categorical_percentiles])



In [40]:
merged_categorical_df

Unnamed: 0,esg_rating,sustainability_rating
Unique Count,7,7
Most Common Value,A,Good
Most Common Freq,61,66
Top 3 Categories,"{'A': 61, 'A-': 53, 'C': 45}","{'Good': 66, 'Sufficient': 55, 'Average': 47}"
0.25,C,Average
0.5,B,Sufficient
0.75,A,Good


In [39]:
numeric_summary_df

Unnamed: 0,esg_score,esg_score_relevance
count,269.0,269.0
mean,50.66,74.3
std,19.91,24.54
min,11.0,22.48
25%,33.0,63.96
50%,54.0,84.11
75%,67.0,94.29
max,84.0,100.0
Most Common Value,69.0,94.68
Most Common Freq,16.0,7.0
