<a target="_blank" href="https://colab.research.google.com/github/BuczynskiRafal/stormwater-analysis/blob/main/stormwater_analysis/data/catchment_classification_model/first_approach_classification.ipynb">

<a target="_blank" href="https://colab.research.google.com/github/BuczynskiRafal/stormwater-analysis/blob/main/stormwater_analysis/data/catchment_classification_model/first_approach_classification.ipynb">
    <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

# Training a model for catchment classification. 
* kategoryzacja zlewni oparta o większość dostępnych ceech zlewni z plików inp i rpt

# Imports

In [1]:
import swmmio
import pyswmm
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers.experimental import preprocessing


desired_width = 500
pd.set_option("display.width", desired_width)
np.set_printoptions(linewidth=desired_width)
pd.set_option("display.max_columns", 30)

In [2]:
classes = pd.DataFrame(
    data={
        "classes": [
            "compact_urban_development",
            "urban",
            "loose_urban_development",
            "wooded_area",
            "grassy",
            "loose_soil",
            "steep_area",
        ]
    }
)
classes

Unnamed: 0,classes
0,compact_urban_development
1,urban
2,loose_urban_development
3,wooded_area
4,grassy
5,loose_soil
6,steep_area


# Get files

In [3]:
INP_FILE_01 = "dataset/subcatchment_dataset_01.inp"
INP_FILE_02 = "dataset/subcatchment_dataset_02.inp"
INP_FILE_03 = "dataset/subcatchment_dataset_03.inp"
INP_FILE_04 = "dataset/subcatchment_dataset_04.inp"
INP_FILE_05 = "dataset/subcatchment_dataset_05.inp"
INP_FILE_06 = "dataset/subcatchment_dataset_06.inp"
INP_FILE_07 = "dataset/subcatchment_dataset_07.inp"
INP_FILE_08 = "dataset/subcatchment_dataset_08.inp"
INP_FILE_09 = "dataset/subcatchment_dataset_09.inp"
INP_FILE_10 = "dataset/subcatchment_dataset_10.inp"
files = [INP_FILE_01, INP_FILE_02, INP_FILE_03, INP_FILE_04, INP_FILE_05, INP_FILE_06, INP_FILE_07, INP_FILE_08, INP_FILE_09, INP_FILE_10]

# Run simulation

In [4]:
for f in files:
    with pyswmm.Simulation(f) as sim:
        for step in sim:
            pass

In [None]:
from data

# Read inp and rpt file as swmmio model object

In [5]:
model_01 = swmmio.Model(INP_FILE_01)
model_02 = swmmio.Model(INP_FILE_02)
model_03 = swmmio.Model(INP_FILE_03)
model_04 = swmmio.Model(INP_FILE_04)
model_05 = swmmio.Model(INP_FILE_05)
model_06 = swmmio.Model(INP_FILE_06)
model_07 = swmmio.Model(INP_FILE_07)
model_08 = swmmio.Model(INP_FILE_08)
model_09 = swmmio.Model(INP_FILE_09)
model_10 = swmmio.Model(INP_FILE_10)

models = [model_01, model_02, model_03, model_04, model_05, model_06, model_07, model_08, model_09, model_10]

## Get subcatchments data from the model

In [6]:
raw_subcatchments_01 = model_01.subcatchments.dataframe
raw_subcatchments_02 = model_02.subcatchments.dataframe
raw_subcatchments_03 = model_03.subcatchments.dataframe
raw_subcatchments_04 = model_04.subcatchments.dataframe
raw_subcatchments_05 = model_05.subcatchments.dataframe
raw_subcatchments_06 = model_06.subcatchments.dataframe
raw_subcatchments_07 = model_07.subcatchments.dataframe
raw_subcatchments_08 = model_08.subcatchments.dataframe
raw_subcatchments_09 = model_09.subcatchments.dataframe
raw_subcatchments_10 = model_10.subcatchments.dataframe

subcatchments_01 = raw_subcatchments_01.copy()
subcatchments_02 = raw_subcatchments_02.copy()
subcatchments_03 = raw_subcatchments_03.copy()
subcatchments_04 = raw_subcatchments_04.copy()
subcatchments_05 = raw_subcatchments_05.copy()
subcatchments_06 = raw_subcatchments_06.copy()
subcatchments_07 = raw_subcatchments_07.copy()
subcatchments_08 = raw_subcatchments_08.copy()
subcatchments_09 = raw_subcatchments_09.copy()
subcatchments_10 = raw_subcatchments_10.copy()

subcatchments = [subcatchments_01, subcatchments_02, subcatchments_03, subcatchments_04, subcatchments_05, subcatchments_06, subcatchments_07, subcatchments_08, subcatchments_09, subcatchments_10]

### Drop unused columns

In [7]:
for frame in subcatchments:
    frame.drop(['N-Imperv', 'N-Perv', 'S-Imperv',	'S-Perv', 'coords', 'RouteTo', 'CurbLength', 'Outlet', 'Raingage', 'TotalRunon', 'TotalEvap', 'TotalInfil', 'ImpervRunoff', 'PervRunoff', 'TotalRunoffIn'], axis=1, inplace=True)

In [8]:
subcatchments_01.iloc[:5, :]

Unnamed: 0_level_0,Area,PercImperv,Width,PercSlope,PctZero,TotalPrecip,TotalRunoffMG,PeakRunoff,RunoffCoeff
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
S1,0.5,20.0,300.0,5.0,70,10.1,0.02,0.02,0.407
S10,1.87,45.0,136.75,15.0,90,10.1,0.11,0.09,0.587
S100,1.71,45.0,130.77,10.0,90,10.1,0.1,0.08,0.581
S1000,0.68,10.0,82.46,5.09,10,10.1,0.02,0.01,0.224
S1001,0.26,83.33,50.99,45.0,80,10.1,0.02,0.02,0.879


## Get categories

In [9]:
categories_01 = model_01.inp.tags
categories_02 = model_02.inp.tags
categories_03 = model_03.inp.tags
categories_04 = model_04.inp.tags
categories_05 = model_05.inp.tags
categories_06 = model_06.inp.tags
categories_07 = model_07.inp.tags
categories_08 = model_08.inp.tags
categories_09 = model_09.inp.tags
categories_10 = model_10.inp.tags
categories_01

Unnamed: 0_level_0,Name,Tag
ElementType,Unnamed: 1_level_1,Unnamed: 2_level_1
Subcatch,S1,loose_soil
Subcatch,S2,compact_urban_development
Subcatch,S3,loose_soil
Subcatch,S4,wooded_area
Subcatch,S5,loose_urban_development
...,...,...
Subcatch,S997,grassy
Subcatch,S998,steep_area
Subcatch,S999,urban
Subcatch,S1000,grassy


### Add categories column to subcatchments DataFrame

In [10]:
def merge_tag(subcatchment, model):
    subcatchment.reset_index(inplace=True)
    merged = subcatchment.merge(model.inp.tags, left_on="Name", right_on="Name", how="left")
    merged.rename(columns={"Tag": "categories"}, inplace=True)
    merged.set_index("Name", inplace=True)
    return merged

merged_df_01 = merge_tag(subcatchments_01, model_01)
merged_df_02 = merge_tag(subcatchments_02, model_02)
merged_df_03 = merge_tag(subcatchments_03, model_03)
merged_df_04 = merge_tag(subcatchments_04, model_04)
merged_df_05 = merge_tag(subcatchments_05, model_05)
merged_df_06 = merge_tag(subcatchments_06, model_06)
merged_df_07 = merge_tag(subcatchments_07, model_07)
merged_df_08 = merge_tag(subcatchments_08, model_08)
merged_df_09 = merge_tag(subcatchments_09, model_09)
merged_df_10 = merge_tag(subcatchments_10, model_10)

merged = [merged_df_01, merged_df_02, merged_df_03, merged_df_04, merged_df_05, merged_df_06, merged_df_07, merged_df_08, merged_df_09, merged_df_10]

df = pd.concat(merged)
df = df.reset_index(drop=True)

In [11]:
df.head()

Unnamed: 0,Area,PercImperv,Width,PercSlope,PctZero,TotalPrecip,TotalRunoffMG,PeakRunoff,RunoffCoeff,categories
0,0.5,20.0,300.0,5.0,70,10.1,0.02,0.02,0.407,loose_soil
1,1.87,45.0,136.75,15.0,90,10.1,0.11,0.09,0.587,compact_urban_development
2,1.71,45.0,130.77,10.0,90,10.1,0.1,0.08,0.581,compact_urban_development
3,0.68,10.0,82.46,5.09,10,10.1,0.02,0.01,0.224,grassy
4,0.26,83.33,50.99,45.0,80,10.1,0.02,0.02,0.879,steep_area


### Split data into features and target

In [12]:
X = df.drop('categories', axis=1)
y = df['categories']

X["TotalPrecip"] = pd.to_numeric(X["TotalPrecip"])
# X["TotalRunon"] = pd.to_numeric(X["TotalRunon"])
# X["TotalEvap"] = pd.to_numeric(X["TotalEvap"])
# X["TotalInfil"] = pd.to_numeric(X["TotalInfil"])
# X["PervRunoff"] = pd.to_numeric(X["PervRunoff"])
# X["ImpervRunoff"] = pd.to_numeric(X["ImpervRunoff"])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

y_train = pd.get_dummies(y_train)
y_test = pd.get_dummies(y_test)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(8008, 9) (8008, 7)
(2002, 9) (2002, 7)


In [13]:
X_train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 8008 entries, 2270 to 7270
Data columns (total 9 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Area           8008 non-null   float64
 1   PercImperv     8008 non-null   float64
 2   Width          8008 non-null   float64
 3   PercSlope      8008 non-null   float64
 4   PctZero        8008 non-null   int64  
 5   TotalPrecip    8008 non-null   float64
 6   TotalRunoffMG  8008 non-null   float64
 7   PeakRunoff     8008 non-null   float64
 8   RunoffCoeff    8008 non-null   float64
dtypes: float64(8), int64(1)
memory usage: 625.6 KB


In [14]:
X_train[:50]

Unnamed: 0,Area,PercImperv,Width,PercSlope,PctZero,TotalPrecip,TotalRunoffMG,PeakRunoff,RunoffCoeff
2270,0.02,10.0,14.14,20.0,10,38.33,0.01,0.01,0.825
8668,1.87,10.0,136.75,20.0,10,12.67,0.06,0.01,0.256
8293,0.81,10.0,90.0,20.0,10,12.67,0.03,0.01,0.28
7868,1.46,10.0,120.83,5.09,10,9.5,0.01,0.0,0.101
1095,1.61,5.09,126.89,15.0,10,20.0,0.19,0.14,0.59
3131,0.58,15.0,76.16,30.0,5,15.0,0.02,0.01,0.259
8116,1.96,10.0,140.0,5.09,10,12.67,0.05,0.01,0.214
1561,0.89,5.09,94.34,15.0,10,20.0,0.11,0.09,0.606
1670,1.52,65.0,123.29,61.67,80,20.0,0.27,0.21,0.879
9951,1.68,45.0,129.61,5.09,90,0.32,0.0,0.0,0.403


In [15]:
normalizer = preprocessing.Normalization()
normalizer.adapt(np.array(X_train))


# Build the model

In [16]:
model = Sequential()
model.add(normalizer)
model.add(Dense(units=X_train.shape[1], activation='relu', input_dim=X_train.shape[1]))
model.add(Dropout(0.2))
model.add(Dense(18, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(units=y_train.shape[1], activation='softmax'))
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 normalization (Normalizatio  (None, 9)                19        
 n)                                                              
                                                                 
 dense (Dense)               (None, 9)                 90        
                                                                 
 dropout (Dropout)           (None, 9)                 0         
                                                                 
 dense_1 (Dense)             (None, 18)                180       
                                                                 
 dropout_1 (Dropout)         (None, 18)                0         
                                                                 
 dense_2 (Dense)             (None, 7)                 133       
                                                        

In [17]:
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)
history = model.fit(
    x=X_train,
    y=y_train,
    epochs=100,
    validation_data=(X_test, y_test),
    validation_split=0.2,
    verbose=1,
    batch_size=16,
    callbacks=[EarlyStopping(monitor='val_loss', patience=10)]
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100


In [18]:
# Evaluate ANN model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test set accuracy: {accuracy:.3f}")

Test set accuracy: 1.000


In [19]:
# model.predict(X_test)
y_pred = model.predict(X_test)



In [20]:
metrics = pd.DataFrame(history.history)
metrics['epoch'] = history.epoch
metrics

Unnamed: 0,loss,accuracy,val_loss,val_accuracy,epoch
0,1.420465,0.451424,1.029929,0.584915,0
1,1.008345,0.583042,0.844355,0.631868,1
2,0.879364,0.626623,0.746162,0.705295,2
3,0.789207,0.666084,0.640726,0.746753,3
4,0.69573,0.700549,0.528291,0.790709,4
5,0.617691,0.732143,0.41522,0.802198,5
6,0.538345,0.761364,0.314234,0.803696,6
7,0.461741,0.805944,0.233198,0.913586,7
8,0.397731,0.836414,0.172083,0.999001,8
9,0.347252,0.859515,0.136908,0.992508,9


# Model evaluate

In [21]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

fig = make_subplots(rows=1, cols=2)
fig.add_trace(go.Scatter(x=metrics['epoch'], y=metrics['accuracy'], name='accuracy'), row=1, col=1)
fig.add_trace(go.Scatter(x=metrics['epoch'], y=metrics['loss'], name='loss'), row=1, col=2)
fig.add_trace(go.Scatter(x=metrics['epoch'], y=metrics['val_accuracy'], name='val_accuracy'), row=1, col=1)
fig.add_trace(go.Scatter(x=metrics['epoch'], y=metrics['val_loss'], name='val_loss'), row=1, col=2)

fig.update_xaxes(title_text='epochs')
fig.update_yaxes(title_text='accuracy', row=1, col=1)
fig.update_yaxes(title_text='loss', row=1, col=2)
fig.update_layout(width=1000, title='Accuracy and Loss')
fig.show()

In [22]:
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)
print(test_acc)

1.0


Predykcja na podstawie modelu.
* model.evaluate(y_true, y_pred) - pozwala obliczyć metryki modelu
* model.predict_classes() - pozwala zwrócić odpowiednio przewidziane klasy
* model.predict_proba(), model.predict() - pozwala zwrócić prawdopodobieństwo danej klasy

In [23]:
predictions = model.predict(X_test)
predictions



array([[0.0000000e+00, 9.9970943e-01, 2.9058167e-04, ..., 0.0000000e+00, 0.0000000e+00, 3.1306797e-37],
       [0.0000000e+00, 9.9946195e-01, 5.3801737e-04, ..., 0.0000000e+00, 0.0000000e+00, 1.3485445e-38],
       [9.8854564e-08, 1.7786550e-10, 3.0745926e-15, ..., 3.1053079e-07, 9.9998057e-01, 1.2135111e-10],
       ...,
       [0.0000000e+00, 9.6455270e-01, 3.5447229e-02, ..., 9.1529156e-24, 1.0563549e-24, 4.8023189e-16],
       [2.5391078e-14, 0.0000000e+00, 0.0000000e+00, ..., 1.0000000e+00, 0.0000000e+00, 3.9153095e-31],
       [4.0277642e-16, 9.1694966e-03, 6.0890976e-08, ..., 1.1300790e-09, 7.6924792e-20, 9.9083042e-01]], dtype=float32)

In [24]:
predictions_df = pd.DataFrame(predictions)
predictions_df

Unnamed: 0,0,1,2,3,4,5,6
0,0.000000e+00,9.997094e-01,2.905817e-04,2.428208e-21,0.000000e+00,0.000000e+00,3.130680e-37
1,0.000000e+00,9.994619e-01,5.380174e-04,6.271826e-21,0.000000e+00,0.000000e+00,1.348544e-38
2,9.885456e-08,1.778655e-10,3.074593e-15,1.894081e-05,3.105308e-07,9.999806e-01,1.213511e-10
3,8.937550e-12,1.525113e-04,1.936593e-05,1.255794e-15,6.825211e-05,4.981484e-16,9.997599e-01
4,7.545330e-11,2.527345e-11,9.967129e-23,4.584423e-04,4.935291e-12,9.995416e-01,2.821707e-14
...,...,...,...,...,...,...,...
1997,7.422767e-13,0.000000e+00,0.000000e+00,0.000000e+00,1.000000e+00,0.000000e+00,3.365569e-36
1998,7.152917e-14,0.000000e+00,0.000000e+00,0.000000e+00,1.000000e+00,0.000000e+00,4.009901e-31
1999,0.000000e+00,9.645527e-01,3.544723e-02,5.631404e-12,9.152916e-24,1.056355e-24,4.802319e-16
2000,2.539108e-14,0.000000e+00,0.000000e+00,0.000000e+00,1.000000e+00,0.000000e+00,3.915310e-31


In [25]:
predictions_cls = predictions.argmax(axis=-1)
predictions_cls

array([1, 1, 5, ..., 1, 4, 6], dtype=int64)

# Zapisanie modelu

In [33]:
model.save(r'C:\Users\Dell\Documents\Git\stormwater-analysis\stormwater_analysis\data\catchment_classification_model\catchemnt_classifier')

INFO:tensorflow:Assets written to: C:\Users\Dell\Documents\Git\stormwater-analysis\stormwater_analysis\data\catchment_classification_model\catchemnt_classifier\assets


# Załadowanie modelu

In [2]:
from tensorflow import keras
model = keras.models.load_model(r'C:\Users\Dell\Documents\Git\stormwater-analysis\stormwater_analysis\data\catchment_classification_model\catchemnt_classifier')

In [28]:
X_test

Unnamed: 0,Area,PercImperv,Width,PercSlope,PctZero,TotalPrecip,TotalRunoffMG,PeakRunoff,RunoffCoeff
7438,1.43,5.09,119.58,10.00,10,9.50,0.01,0.00,0.063
9058,1.00,5.09,100.00,10.00,10,0.32,0.00,0.00,0.005
568,0.77,30.00,87.75,15.00,80,10.10,0.04,0.03,0.462
9344,0.60,15.00,77.46,30.00,5,0.32,0.00,0.00,0.007
2236,1.39,30.00,117.90,5.09,80,38.33,0.44,0.30,0.820
...,...,...,...,...,...,...,...,...,...
533,0.19,83.33,43.59,45.00,80,10.10,0.02,0.01,0.880
132,1.36,65.00,116.62,61.67,80,10.10,0.10,0.09,0.761
7329,0.30,10.00,54.77,20.00,10,9.50,0.00,0.00,0.127
8466,1.25,65.00,111.80,61.67,80,12.67,0.12,0.04,0.772


In [9]:
! pip install pydot




You should consider upgrading via the 'C:\Users\Dell\Documents\Git\stormwater-analysis\venv\Scripts\python.exe -m pip install --upgrade pip' command.


In [12]:
from tensorflow.keras.utils import plot_model
plot_model(model)

You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model to work.


In [13]:
pred = model.predict(X_test)
pred

NameError: name 'X_test' is not defined

In [35]:
tensorflow.version

NameError: name 'tensorflow' is not defined