In [1]:
### IMPORTS
import os
import csv
import numpy as np
import pandas as pd
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [2]:
### Get dataset from csv files in a folder

def get_dataset_label(PATH):
    label = np.array([])
    dataset = np.empty((0,2400), dtype=float)
    
    for filename in os.listdir(PATH):
        label = np.append(label, filename[7:10])

        df = pd.read_csv(PATH+filename)

        columns_to_concatenate = [col for col in df.columns if col != 'timestamp']

        concatenated_row = pd.concat([df[col] for col in columns_to_concatenate], ignore_index=True)

        dataset = np.concatenate([dataset, [concatenated_row]])
        
    return dataset, label


In [3]:
### Split the data into training and testing sets

DATASET_PATH = 'data1/training_samples/'
# VALIDATIONSET_PATH = 'data1/validation_samples/'

train_set, train_label = get_dataset_label(DATASET_PATH)
validation_set, validation_label = get_dataset_label(VALIDATIONSET_PATH)


In [4]:
### Create an SVM classifier
clf = svm.SVC(kernel='linear')  # You can change the kernel to 'rbf' for non-linear data

# Train the SVM classifier on the training data
clf.fit(train_set, train_label)

In [5]:
# Make predictions on the test data
validation_pred = clf.predict(validation_set)

print(validation_label)
print(validation_pred)

# Evaluate the accuracy of the model
accuracy = accuracy_score(validation_label, validation_pred)
print("Accuracy:", accuracy)

['GCO' 'GCO' 'GEa' 'GEa' 'GEy' 'GEy' 'GMe' 'GMe']
['GCO' 'GCO' 'GEa' 'GEa' 'GEy' 'GEy' 'GMe' 'GMe']
Accuracy: 1.0


In [7]:
### Regression model

from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error

GEy_conc = [12.5,25.0,37.5,50.0,62.5,75.0,87.5,100.0,112.5,125.0]
GEa_conc = [12.5,25.0,37.5,50.0,62.5,75.0,87.5,100.0,112.5,125.0]
GCO_conc = [25.0,50.0,75.0,100.0,125.0,150.0,175.0,200.0,225.0,250.0]
GMe_conc = [25.0,50.0,75.0,100.0,125.0,150.0,175.0,200.0,225.0,250.0]

def get_concentration(filename):
    gas_label = filename[7:10]
    conc_idx = int(float(filename[12:15])/10) - 1
    if gas_label == 'GEy':
        return GEy_conc[conc_idx]
    elif gas_label == 'GEa':
        return GEa_conc[conc_idx]
    elif gas_label == 'GCO':
        return GCO_conc[conc_idx]
    elif gas_label == 'GMe':
        return GMe_conc[conc_idx]
    return 'error'

def get_dataset_concentrations(PATH, GAS_NAME):
    concentrations = np.array([])
    dataset = np.empty((0,2400), dtype=float)
    
    for filename in os.listdir(PATH):
        if filename[7:10] == GAS_NAME:
            concentrations = np.append(concentrations, get_concentration(filename))

            df = pd.read_csv(PATH+filename)

            columns_to_concatenate = [col for col in df.columns if col != 'timestamp']

            concatenated_row = pd.concat([df[col] for col in columns_to_concatenate], ignore_index=True)

            dataset = np.concatenate([dataset, [concatenated_row]])
        
    return dataset, concentrations


In [8]:
### Split the data into training and testing sets regression

DATASET_PATH = 'data1/training_samples/'
VALIDATIONSET_PATH = 'data1/validation_samples/'

gco_set, gco_concentrations = get_dataset_concentrations(DATASET_PATH, 'GCO')
gea_set, gea_concentrations = get_dataset_concentrations(DATASET_PATH, 'GEa')
gey_set, gey_concentrations = get_dataset_concentrations(DATASET_PATH, 'GEy')
gme_set, gme_concentrations = get_dataset_concentrations(DATASET_PATH, 'GMe')

gco_validation_set, gco_validation_concentrations = get_dataset_concentrations(VALIDATIONSET_PATH, 'GCO')
gea_validation_set, gea_validation_concentrations = get_dataset_concentrations(VALIDATIONSET_PATH, 'GEa')
gey_validation_set, gey_validation_concentrations = get_dataset_concentrations(VALIDATIONSET_PATH, 'GEy')
gme_validation_set, gme_validation_concentrations = get_dataset_concentrations(VALIDATIONSET_PATH, 'GMe')


In [9]:
# Create an SVM regression model
gco_regressor = SVR(kernel='linear')  # You can change the kernel to 'rbf' for non-linear data
gea_regressor = SVR(kernel='linear')  # You can change the kernel to 'rbf' for non-linear data
gey_regressor = SVR(kernel='linear')  # You can change the kernel to 'rbf' for non-linear data
gme_regressor = SVR(kernel='linear')  # You can change the kernel to 'rbf' for non-linear data

# Train the SVM regression model on the training data
gco_regressor.fit(gco_set, gco_concentrations)
gea_regressor.fit(gea_set, gea_concentrations)
gey_regressor.fit(gey_set, gey_concentrations)
gme_regressor.fit(gme_set, gme_concentrations)


In [11]:
# Make predictions on the test data
gco_validation_pred = clf.predict(gco_validation_set)
gea_validation_pred = clf.predict(gea_validation_set)
gey_validation_pred = clf.predict(gey_validation_set)
gme_validation_pred = clf.predict(gme_validation_set)


gco_predicted_concentrations = gco_regressor.predict(gco_validation_set)
gea_predicted_concentrations = gea_regressor.predict(gea_validation_set)
gey_predicted_concentrations = gey_regressor.predict(gey_validation_set)
gme_predicted_concentrations = gme_regressor.predict(gme_validation_set)

gco_mse = mean_squared_error(gco_validation_concentrations, gco_predicted_concentrations)
gea_mse = mean_squared_error(gea_validation_concentrations, gea_predicted_concentrations)
gey_mse = mean_squared_error(gey_validation_concentrations, gey_predicted_concentrations)
gme_mse = mean_squared_error(gme_validation_concentrations, gme_predicted_concentrations)


print("GCO predict labels:", gco_validation_pred)
print("GCO predict concentrations:", gco_predicted_concentrations)
print("GCO actual concentrations:", gco_validation_concentrations)
print("GCO Mean Squared Error:", gco_mse)
print("\n")

print("GEa predict labels:", gea_validation_pred)
print("GEa predict concentrations:", gea_predicted_concentrations)
print("GEa actual concentrations:", gea_validation_concentrations)
print("GEa Mean Squared Error:", gea_mse)
print("\n")

print("GEy predict labels:", gey_validation_pred)
print("GEy predict concentrations:", gey_predicted_concentrations)
print("GEy actual concentrations:", gey_validation_concentrations)
print("GEy Mean Squared Error:", gey_mse)
print("\n")

print("GMe predict labels:", gme_validation_pred)
print("GMe predict concentrations:", gme_predicted_concentrations)
print("GMe actual concentrations:", gme_validation_concentrations)
print("GMe Mean Squared Error:", gme_mse)


# Evaluate the performance of the model using Mean Squared Error (MSE)
# mse = mean_squared_error(validation_concentrations, validation_pred)
# print("Mean Squared Error:", mse)

GCO predict labels: ['GCO' 'GCO']
GCO predict concentrations: [102.85411069 182.26770193]
GCO actual concentrations: [ 75. 175.]
GCO Mean Squared Error: 414.3354867484866


GEa predict labels: ['GEa' 'GEa']
GEa predict concentrations: [45.29962325 95.08145683]
GEa actual concentrations: [37.5 87.5]
GEa Mean Squared Error: 59.156305284212905


GEy predict labels: ['GEy' 'GEy']
GEy predict concentrations: [38.37933519 83.15720355]
GEy actual concentrations: [37.5 87.5]
GEy Mean Squared Error: 9.816555704387106


GMe predict labels: ['GMe' 'GMe']
GMe predict concentrations: [ 65.49883046 186.91628784]
GMe actual concentrations: [ 75. 175.]
GMe Mean Squared Error: 116.13506917055633
