# Task 002: OOPS + Pandas

In [31]:
import pandas as pd

In [32]:
class DataLoader:
    def __init__(self, file_path, chunksize):
        self.file_path = file_path
        self.chunksize = chunksize
        self.iterator = pd.read_csv(file_path, chunksize=self.chunksize, thousands=',')
        self.data_chunks = []
        self.total_batches = 0

    # def __iter__(self):
    #     return self
    
    # def __next__(self):
    #     try:
    #         chunk = next(self.prac)
    #         self.data_chunks.append(chunk)
    #         self.total_batches += 1
    #         return chunk
    #     except StopIteration:
    #         raise StopIteration
        
    def load_data(self):
        print(f"Reading the data in chunks of size {self.chunksize}...")
        
        for chunk in self.iterator:
            self.data_chunks.append(chunk)
            self.total_batches += 1

        print(f"Total batches loaded: {self.total_batches}")
        print(f"All data loaded successfully!")
        return pd.concat(self.data_chunks, ignore_index=True)
    
    def get_dataframe(self):
        if not self.data_chunks:
            raise ValueError("No data loaded yet.")
        
        df = pd.concat(self.data_chunks, ignore_index=True)
        df['Population'] = df['Population'].astype(int)
        df['Metropolitan'] = df['Metropolitan'].astype(int)
        df['Sexratio'] = pd.to_numeric(df['Sexratio'], errors='coerce')
        df['Literacy'] = pd.to_numeric(df['Literacy'], errors='coerce')

        return df
    
    def get_state_details(self):
        data = self.get_dataframe()
        summary = data.groupby('State').agg({
            "Population": ["sum", "mean", "min", "max"],
            "Sexratio": "mean",
            "Literacy": "mean"
        }).round(2).sort_values(("Population", "sum"), ascending=False)
        return summary

    def state_summary(self):
        df = self.get_dataframe()
        state_name = input("Enter the state name: ").strip()
        state_data = df[df['State'].str.lower() == state_name.lower()]

        if state_data.empty:
            print(f"No data found for state: {state_name}")
            return

        print(f"\nSummary for State: {state_name.title()}")
        print(f"    Cities: {', '.join(state_data['City'].unique())}")
        print(f"    Total Population: {state_data['Population'].sum():,}")
        print(f"    Total Metropolitan Population: {state_data['Metropolitan'].sum():,}")
        print(f"    Average Sex Ratio: {state_data['Sexratio'].mean():.2f}")
        print(f"    Average Literacy: {state_data['Literacy'].mean():.2f}%")

    def city_summary(self):
        df = self.get_dataframe()
        city_name = input("Enter the city name: ").strip()
        city_data = df[df['City'].str.lower() == city_name.lower()]

        if city_data.empty:
            print(f"No data found for city: {city_name}")
            return

        row = city_data.iloc[0]
        print(f"\nSummary for City: {row['City']}")
        print(f"    State: {row['State']}")
        print(f"    Population: {int(row['Population']):,}")
        print(f"    Metropolitan: {int(row['Metropolitan']):,}")
        print(f"    Sex Ratio: {row['Sexratio']}")
        print(f"    Literacy: {row['Literacy']}%")

In [33]:
if __name__ == "__main__":
    file = "data/POPS.csv"
    loader = DataLoader(file_path=file, chunksize=10)

    loader.load_data()


Reading the data in chunks of size 10...
Total batches loaded: 50
All data loaded successfully!


In [34]:
loader.get_dataframe()

Unnamed: 0,#,City,State,Population,Metropolitan,Sexratio,Literacy
0,1,Mumbai,Maharashtra,12442373,18414288,853,89.73
1,2,Delhi,Delhi,11034555,16314838,876,87.59
2,3,Bangalore,Karnataka,8443675,8499399,923,88.71
3,4,Hyderabad,Andhra Pradesh,6731790,7749334,955,83.26
4,5,Ahmedabad,Gujarat,5577940,6352254,898,88.29
...,...,...,...,...,...,...,...
493,494,Gangtok,Sikkim,100286,100286,912,89.33
494,495,Datia,Madhya Pradesh,100284,100284,900,80.56
495,496,Nagda,Madhya Pradesh,100039,100039,947,80.71
496,497,Banswara,Rajasthan,99969,101177,960,85.72


In [35]:
print("\n--- State-wise Details ---")
loader.get_state_details()


--- State-wise Details ---


Unnamed: 0_level_0,Population,Population,Population,Population,Sexratio,Literacy
Unnamed: 0_level_1,sum,mean,min,max,mean,mean
State,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
Maharashtra,38959004,885431.91,101805,12442373,926.02,88.68
Uttar Pradesh,26966642,421353.78,101277,2817105,896.45,77.91
Andhra Pradesh,18064536,430108.0,101398,6731790,999.4,80.47
West Bengal,17990884,294932.52,100575,4496694,949.75,87.21
Gujarat,17964399,598813.3,105573,5577940,910.07,86.13
Karnataka,15827470,608748.85,105529,8443675,991.15,86.03
Tamil Nadu,13823172,431974.12,102905,4646732,1003.25,89.96
Delhi,13510933,900728.87,110351,11034555,863.4,83.5
Madhya Pradesh,11046815,345212.97,100039,1964086,916.62,84.23
Rajasthan,10412360,359046.9,99969,3046163,912.62,79.63


In [36]:
loader.state_summary()


Summary for State: Delhi
    Cities: Delhi, Kirari Suleman Nagar, NDMC, Karawal Nagar, Nangloi Jat, Bhalswa Jahangir Pur, Sultan Pur Majra, Hastsal, Deoli, Dallo Pura, Burari, Mustafabad, Gokal Pur, Mandoli, Delhi Cantonment
    Total Population: 13,510,933
    Total Metropolitan Population: 18,791,216
    Average Sex Ratio: 863.40
    Average Literacy: 83.50%


In [37]:
loader.city_summary()


Summary for City: Deoli
    State: Delhi
    Population: 169,122
    Metropolitan: 169,122
    Sex Ratio: 860
    Literacy: 84.28%
