In [9]:
import pandas as pd
import os

# INPUT CSV
df = pd.read_csv("State_UT_wise_total_and_surfaced_length_of_roads_in_India_during_2009-2011.csv")

# Normalize
df = df.rename(columns=lambda c: c.strip())
state_col = "States/UTs"
flag_col = "Total/ Surfaced"

# Output folders
os.makedirs("state_time_series_total_surface", exist_ok=True)
os.makedirs("state_time_series_ratio", exist_ok=True)

YEARS = ["2009", "2010", "2011"]

states = df[state_col].unique()
records = []

for st in states:
    sub = df[df[state_col] == st]

    # Total row
    total_row = sub[sub[flag_col].str.upper().str.contains("T")]
    if total_row.empty:
        total_row = sub.iloc[[0]]

    # Surfaced row
    surf_row = sub[sub[flag_col].str.upper().str.contains("S")]
    if surf_row.empty:
        surf_row = sub.iloc[[1]]

    total_vals = [float(total_row[y]) for y in YEARS]
    surf_vals = [float(surf_row[y]) for y in YEARS]

    # -------- (1) state CSV total/surfaced --------
    ts_df = pd.DataFrame({
        "Type": ["Total", "Surfaced"],
        "2009": [total_vals[0], surf_vals[0]],
        "2010": [total_vals[1], surf_vals[1]],
        "2011": [total_vals[2], surf_vals[2]],
    })
    ts_df.to_csv(f"state_time_series_total_surface/{st}.csv", index=False)

    # -------- (2) state CSV ratio --------
    ratios = [surf_vals[i] / total_vals[i] if total_vals[i] != 0 else None for i in range(3)]
    
    ratio_df = pd.DataFrame({
        "Ratio = Surfaced/Total": ["Value"],
        "2009": [ratios[0]],
        "2010": [ratios[1]],
        "2011": [ratios[2]],
    })
    ratio_df.to_csv(f"state_time_series_ratio/{st}.csv", index=False)

    # Save for ranking
    avg_ratio = sum([r for r in ratios if r is not None]) / len(ratios)
    records.append({
        "State": st,
        "2009": ratios[0],
        "2010": ratios[1],
        "2011": ratios[2],
        "AvgRatio": avg_ratio
    })

# Convert to DataFrame
rank_df = pd.DataFrame(records).dropna(subset=["AvgRatio"])

# -------- (3) BEST & WORST by AVG ratio --------

# Top 10 best
top10 = rank_df.sort_values("AvgRatio", ascending=False).head(10)[
    ["State", "2009", "2010", "2011"]
]

# Top 10 worst
bottom10 = rank_df.sort_values("AvgRatio", ascending=True).head(10)[
    ["State", "2009", "2010", "2011"]
]

# Save outputs (NO avg in final CSV)
top10.to_csv("top10_ratio_states.csv", index=False)
bottom10.to_csv("bottom10_ratio_states.csv", index=False)

print("UPDATED BEST/WORST BASED ON AVERAGE RATIO SUCCESSFULLY GENERATED")


UPDATED BEST/WORST BASED ON AVERAGE RATIO SUCCESSFULLY GENERATED


  total_vals = [float(total_row[y]) for y in YEARS]
  surf_vals = [float(surf_row[y]) for y in YEARS]
