# Training a model for catchment classification. 

## Imports

In [1]:
import swmmio
import pyswmm
import pandas as pd
import numpy as np

desired_width = 500
pd.set_option("display.width", desired_width)
np.set_printoptions(linewidth=desired_width)
pd.set_option("display.max_columns", 30)

## Get files

In [3]:
INP_FILE = "dataset/subcatchment_dataset.inp"
RPT_FILE = "dataset/subcatchment_dataset.inp"

## Run simulation

In [15]:
with pyswmm.Simulation(FILE) as sim:
    for step in sim:
        pass
    

# Read inp and rpt file as swmmio model object

In [4]:
model = swmmio.Model(INP_FILE)

## Get subcatchments data from the model

In [24]:
raw_subcatchments = model.subcatchments.dataframe
subcatchments = raw_subcatchments.copy()
subcatchments[:5]

Unnamed: 0_level_0,Raingage,Outlet,Area,PercImperv,Width,PercSlope,CurbLength,N-Imperv,N-Perv,S-Imperv,S-Perv,PctZero,RouteTo,TotalPrecip,TotalRunon,TotalEvap,TotalInfil,ImpervRunoff,PervRunoff,TotalRunoffIn,TotalRunoffMG,PeakRunoff,RunoffCoeff,coords
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
S1,Raingage2,O1,0.5,20.0,300.0,5.0,0,0.15,0.41,1.27,5.08,70,OUTLET,10.1,0.0,0.0,5.91,1.95,2.17,4.12,0.02,0.02,0.407,"[(777180.0, 592590.0), (777180.0, 592585.0), (777175.0, 592585.0), (777175.0, 592590.0), (777180.0, 592590.0)]"
S10,Raingage2,O1,1.87,45.0,136.75,15.0,0,0.013,0.15,1.27,5.08,90,OUTLET,10.1,0.0,0.0,4.13,4.5,1.43,5.93,0.11,0.09,0.587,
S100,Raingage2,O1,1.71,45.0,130.77,10.0,0,0.013,0.15,1.27,5.08,90,OUTLET,10.1,0.0,0.0,4.18,4.5,1.38,5.87,0.1,0.08,0.581,
S1000,Raingage2,O1,0.68,10.0,82.46,5.09,0,0.15,0.41,1.27,5.08,10,OUTLET,10.1,0.0,0.0,7.73,0.9,1.36,2.26,0.02,0.01,0.224,
S1001,Raingage2,O1,0.26,83.33,50.99,45.0,0,0.013,0.05,1.27,5.08,80,OUTLET,10.1,0.0,0.0,1.04,8.23,0.65,8.88,0.02,0.02,0.879,


### Drop unused columns

In [6]:
subcatchments.drop(['coords', 'RouteTo'], axis=1, inplace=True)
subcatchments

Unnamed: 0_level_0,Raingage,Outlet,Area,PercImperv,Width,PercSlope,CurbLength,N-Imperv,N-Perv,S-Imperv,S-Perv,PctZero,TotalPrecip,TotalRunon,TotalEvap,TotalInfil,ImpervRunoff,PervRunoff,TotalRunoffIn,TotalRunoffMG,PeakRunoff,RunoffCoeff
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
S1,Raingage2,O1,0.50,20.00,300.00,5.00,0,0.150,0.41,1.27,5.08,70,10.10,0.00,0.00,5.91,1.95,2.17,4.12,0.02,0.02,0.407
S10,Raingage2,O1,1.87,45.00,136.75,15.00,0,0.013,0.15,1.27,5.08,90,10.10,0.00,0.00,4.13,4.50,1.43,5.93,0.11,0.09,0.587
S100,Raingage2,O1,1.71,45.00,130.77,10.00,0,0.013,0.15,1.27,5.08,90,10.10,0.00,0.00,4.18,4.50,1.38,5.87,0.10,0.08,0.581
S1000,Raingage2,O1,0.68,10.00,82.46,5.09,0,0.150,0.41,1.27,5.08,10,10.10,0.00,0.00,7.73,0.90,1.36,2.26,0.02,0.01,0.224
S1001,Raingage2,O1,0.26,83.33,50.99,45.00,0,0.013,0.05,1.27,5.08,80,10.10,0.00,0.00,1.04,8.23,0.65,8.88,0.02,0.02,0.879
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
S995,Raingage2,O1,1.10,10.00,104.88,15.00,0,0.150,0.41,1.27,5.08,10,10.10,0.00,0.00,7.53,0.90,1.56,2.46,0.03,0.02,0.243
S996,Raingage2,O1,1.89,5.09,137.48,15.00,0,0.060,0.17,1.27,5.08,10,10.10,0.00,0.00,7.55,0.46,2.04,2.49,0.05,0.03,0.247
S997,Raingage2,O1,1.19,10.00,109.09,5.09,0,0.150,0.41,1.27,5.08,10,10.10,0.00,0.00,7.91,0.90,1.18,2.08,0.02,0.01,0.206
S998,Raingage2,O1,0.05,65.00,22.36,61.67,0,0.013,0.05,1.27,5.08,80,10.10,0.00,0.00,2.18,6.43,1.37,7.81,0.00,0.00,0.773


## Get categories

In [8]:
categories = model.inp.tags
categories

Unnamed: 0_level_0,Name,Tag
ElementType,Unnamed: 1_level_1,Unnamed: 2_level_1
Subcatch,S1,#comment
Subcatch,S2,compact_urban_development
Subcatch,S3,loose_soil
Subcatch,S4,wooded_area
Subcatch,S5,loose_urban_development
...,...,...
Subcatch,S997,grassy
Subcatch,S998,steep_area
Subcatch,S999,urban
Subcatch,S1000,grassy


In [12]:
# Reset the index of subcatchments DataFrame
subcatchments.reset_index(inplace=True)

# Merge the two DataFrames on the "Name" column
merged_df = subcatchments.merge(model.inp.tags, left_on="Name", right_on="Name", how="left")

# Rename the 'Tag' column to 'categories'
merged_df.rename(columns={"Tag": "categories"}, inplace=True)

# Set the index back to "Name"
merged_df.set_index("Name", inplace=True)

# Display the merged DataFrame


ValueError: cannot insert level_0, already exists

In [23]:
merged_df[:10]

Unnamed: 0_level_0,level_0,index,Raingage,Outlet,Area,PercImperv,Width,PercSlope,CurbLength,N-Imperv,N-Perv,S-Imperv,S-Perv,PctZero,TotalPrecip,TotalRunon,TotalEvap,TotalInfil,ImpervRunoff,PervRunoff,TotalRunoffIn,TotalRunoffMG,PeakRunoff,RunoffCoeff,categories
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
S1,0,0,Raingage2,O1,0.5,20.0,300.0,5.0,0,0.15,0.41,1.27,5.08,70,10.1,0.0,0.0,5.91,1.95,2.17,4.12,0.02,0.02,0.407,#comment
S10,1,1,Raingage2,O1,1.87,45.0,136.75,15.0,0,0.013,0.15,1.27,5.08,90,10.1,0.0,0.0,4.13,4.5,1.43,5.93,0.11,0.09,0.587,compact_urban_development
S100,2,2,Raingage2,O1,1.71,45.0,130.77,10.0,0,0.013,0.15,1.27,5.08,90,10.1,0.0,0.0,4.18,4.5,1.38,5.87,0.1,0.08,0.581,compact_urban_development
S1000,3,3,Raingage2,O1,0.68,10.0,82.46,5.09,0,0.15,0.41,1.27,5.08,10,10.1,0.0,0.0,7.73,0.9,1.36,2.26,0.02,0.01,0.224,grassy
S1001,4,4,Raingage2,O1,0.26,83.33,50.99,45.0,0,0.013,0.05,1.27,5.08,80,10.1,0.0,0.0,1.04,8.23,0.65,8.88,0.02,0.02,0.879,steep_area
S101,5,5,Raingage2,O1,0.94,22.5,96.95,5.09,0,0.013,0.41,1.27,5.08,70,10.1,0.0,0.0,6.66,2.19,1.17,3.36,0.03,0.02,0.333,loose_urban_development
S102,6,6,Raingage2,O1,1.56,22.5,124.9,2.33,0,0.013,0.41,1.27,5.08,70,10.1,0.0,0.0,7.01,2.19,0.82,3.01,0.05,0.03,0.298,loose_urban_development
S103,7,7,Raingage2,O1,1.92,5.09,138.56,15.0,0,0.06,0.17,1.27,5.08,10,10.1,0.0,0.0,7.56,0.46,2.03,2.49,0.05,0.03,0.246,loose_soil
S104,8,8,Raingage2,O1,1.81,10.0,134.54,15.0,0,0.15,0.41,1.27,5.08,10,10.1,0.0,0.0,7.69,0.9,1.4,2.29,0.04,0.02,0.227,grassy
S105,9,9,Raingage2,O1,1.71,10.0,130.77,5.09,0,0.15,0.41,1.27,5.08,10,10.1,0.0,0.0,8.02,0.9,1.07,1.96,0.03,0.02,0.194,grassy


merged_df.head()### Empty cells

In [46]:
subcatchments.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1001 entries, S1 to S999
Data columns (total 22 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Raingage       1001 non-null   object 
 1   Outlet         1001 non-null   object 
 2   Area           1001 non-null   float64
 3   PercImperv     1001 non-null   float64
 4   Width          1001 non-null   float64
 5   PercSlope      1001 non-null   float64
 6   CurbLength     1001 non-null   int64  
 7   N-Imperv       1001 non-null   float64
 8   N-Perv         1001 non-null   float64
 9   S-Imperv       1001 non-null   float64
 10  S-Perv         1001 non-null   float64
 11  PctZero        1001 non-null   int64  
 12  TotalPrecip    1001 non-null   object 
 13  TotalRunon     1001 non-null   object 
 14  TotalEvap      1001 non-null   object 
 15  TotalInfil     1001 non-null   object 
 16  ImpervRunoff   1001 non-null   float64
 17  PervRunoff     1001 non-null   object 
 18  TotalRunoffI