In [12]:
import pandas as pd

# --------------------------
# 1. Load the Excel file and extract the real table
# --------------------------
file_path = "UNCTAD-ISDS-Navigator-data-set-31December2023.xlsx"

# Load raw sheet with no header
raw = pd.read_excel(file_path, sheet_name="Source - UNCTAD ISDS Navigator", header=None)

# Detect header row (where first column is "NO.")
header_row_idx = raw[0].eq("NO.").idxmax()
column_names = raw.iloc[header_row_idx].tolist()

# Build cleaned dataframe
df = raw.iloc[header_row_idx + 1:].copy()
df.columns = column_names
df = df.reset_index(drop=True)

# --------------------------
# 2. Basic structural exploration
# --------------------------
print("===== FIRST 5 ROWS =====")
print(df.head(), "\n")

print("===== COLUMN NAMES =====")
print(list(df.columns), "\n")

print("===== DATAFRAME INFO =====")
print(df.info(), "\n")

print("===== SHAPE OF DATA (rows, columns) =====")
print(df.shape, "\n")

print("===== MISSING VALUES PER COLUMN =====")
print(df.isna().sum(), "\n")

# --------------------------
# 3. Simple numeric conversion for basic stats
# --------------------------
# Convert NO. and YEAR OF INITIATION to numeric (if possible)
df["NO._NUM"] = pd.to_numeric(df["NO."], errors="coerce")
df["YEAR_OF_INITIATION_NUM"] = pd.to_numeric(df["YEAR OF INITIATION"], errors="coerce")

print("===== SUMMARY STATISTICS FOR BASIC NUMERIC COLUMNS =====")
print(df[["NO._NUM", "YEAR_OF_INITIATION_NUM"]].describe(), "\n")

# --------------------------
# 4. Very simple categorical exploration
# --------------------------
print("===== TOP 10 RESPONDENT STATES =====")
print(df["RESPONDENT STATE"].value_counts().head(10), "\n")

print("===== TOP 10 HOME STATES OF INVESTOR =====")
print(df["HOME STATE OF INVESTOR"].value_counts().head(10), "\n")

print("===== TOP 10 ECONOMIC SECTORS =====")
print(df["ECONOMIC SECTOR"].value_counts().head(10), "\n")

print("===== CASES PER YEAR (sorted) =====")
print(df["YEAR OF INITIATION"].value_counts().sort_index(), "\n")


===== FIRST 5 ROWS =====
  NO. YEAR OF INITIATION              SHORT CASE NAME  \
0   1               2023    Abertis v. Argentina (II)   
1   2               2023             Access v. Mexico   
2   3               2023               AET v. Germany   
3   4               2023   Amadeus v. North Macedonia   
4   5               2023  Ambiente v. North Macedonia   

                                      FULL CASE NAME  \
0  Abertis Infraestructuras, S.A. v. Argentine Re...   
1  Access Business Group LLC v. United Mexican St...   
2  Azienda Elettrica Ticinese v. Federal Republic...   
3  Amadeus Group and Amadeus Development DOOEL v....   
4  FCL Ambiente S.r.l. v. Republic of North Maced...   

                                      APPLICABLE IIA  \
0                       Argentina - Spain BIT (1991)   
1                        NAFTA (1992);\nUSMCA (2018)   
2                   The Energy Charter Treaty (1994)   
3  Albania - Macedonia, The former Yugoslav Repub...   
4  Italy - Mace

In [13]:
print("===== DATAFRAME INFO =====")
print(df.info(), "\n")

print("===== SHAPE OF DATA (rows, columns) =====")
print(df.shape, "\n")

===== DATAFRAME INFO =====
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1332 entries, 0 to 1331
Data columns (total 30 columns):
 #   Column                                                   Non-Null Count  Dtype 
---  ------                                                   --------------  ----- 
 0   NO.                                                      1332 non-null   object
 1   YEAR OF INITIATION                                       1332 non-null   object
 2   SHORT CASE NAME                                          1332 non-null   object
 3   FULL CASE NAME                                           1332 non-null   object
 4   APPLICABLE IIA                                           1332 non-null   object
 5   ARBITRAL RULES                                           1332 non-null   object
 6   ADMINISTERING INSTITUTION                                1332 non-null   object
 7   STATUS/OUTCOME OF ORIGINAL PROCEEDINGS                   1332 non-null   object
 8   RESPONDENT 