In [1]:
### IMPORTS
import os
import csv
import numpy as np
import pandas as pd
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error

In [2]:
### Get dataset from csv files in a folder

GEy_conc = [12.5,25.0,37.5,50.0,62.5,75.0,87.5,100.0,112.5,125.0]
GEa_conc = [12.5,25.0,37.5,50.0,62.5,75.0,87.5,100.0,112.5,125.0]
GCO_conc = [25.0,50.0,75.0,100.0,125.0,150.0,175.0,200.0,225.0,250.0]
GMe_conc = [25.0,50.0,75.0,100.0,125.0,150.0,175.0,200.0,225.0,250.0]

def get_dataset_label(PATH):
    label = np.array([])
    dataset = np.empty((0,2400), dtype=float)
    
    for filename in os.listdir(PATH):
        label = np.append(label, filename[7:10])

        df = pd.read_csv(PATH+filename)

        columns_to_concatenate = [col for col in df.columns if col != 'timestamp']

        concatenated_row = pd.concat([df[col] for col in columns_to_concatenate], ignore_index=True)

        dataset = np.concatenate([dataset, [concatenated_row]])
        
    return dataset, label

def get_concentration(filename):
    gas_label = filename[7:10]
    conc_idx = int(float(filename[12:15])/10) - 1
    if gas_label == 'GEy':
        return GEy_conc[conc_idx]
    elif gas_label == 'GEa':
        return GEa_conc[conc_idx]
    elif gas_label == 'GCO':
        return GCO_conc[conc_idx]
    elif gas_label == 'GMe':
        return GMe_conc[conc_idx]
    return 'error'

def get_dataset_concentrations(PATH, GAS_NAME):
    concentrations = np.array([])
    dataset = np.empty((0,2400), dtype=float)
    
    for filename in os.listdir(PATH):
        if filename[7:10] == GAS_NAME:
            concentrations = np.append(concentrations, get_concentration(filename))

            df = pd.read_csv(PATH+filename)

            columns_to_concatenate = [col for col in df.columns if col != 'timestamp']

            concatenated_row = pd.concat([df[col] for col in columns_to_concatenate], ignore_index=True)

            dataset = np.concatenate([dataset, [concatenated_row]])
        
    return dataset, concentrations


In [3]:
### Split the data into training and testing sets

DATASET_PATH = 'data1/training_samples/'

## Classifier data
gas_sample, labels = get_dataset_label(DATASET_PATH)
gas_sample_train, gas_sample_test, labels_train, labels_test = train_test_split(gas_sample, labels, test_size=0.2, random_state=42)

## Regression data
gco_sample, gco_conc = get_dataset_concentrations(DATASET_PATH, 'GCO')
gea_sample, gea_conc = get_dataset_concentrations(DATASET_PATH, 'GEa')
gey_sample, gey_conc = get_dataset_concentrations(DATASET_PATH, 'GEy')
gme_sample, gme_conc = get_dataset_concentrations(DATASET_PATH, 'GMe')

gco_sample_train, gco_sample_test, gco_conc_train, gco_conc_test = train_test_split(gco_sample, gco_conc, test_size=0.25, random_state=42)
gea_sample_train, gea_sample_test, gea_conc_train, gea_conc_test = train_test_split(gea_sample, gea_conc, test_size=0.25, random_state=42)
gey_sample_train, gey_sample_test, gey_conc_train, gey_conc_test = train_test_split(gey_sample, gey_conc, test_size=0.25, random_state=42)
gme_sample_train, gme_sample_test, gme_conc_train, gme_conc_test = train_test_split(gme_sample, gme_conc, test_size=0.25, random_state=42)



In [4]:
### Create an SVM classifier
clf = svm.SVC(kernel='linear')  # You can change the kernel to 'rbf' for non-linear data

# Train the SVM classifier on the training data
clf.fit(gas_sample_train, labels_train)

### Create an SVM regression model
gco_regressor = SVR(kernel='linear')  # You can change the kernel to 'rbf' for non-linear data
gea_regressor = SVR(kernel='linear')  # You can change the kernel to 'rbf' for non-linear data
gey_regressor = SVR(kernel='linear')  # You can change the kernel to 'rbf' for non-linear data
gme_regressor = SVR(kernel='linear')  # You can change the kernel to 'rbf' for non-linear data

# Train the SVM regression model on the training data
gco_regressor.fit(gco_sample_train, gco_conc_train)
gea_regressor.fit(gea_sample_train, gea_conc_train)
gey_regressor.fit(gey_sample_train, gey_conc_train)
gme_regressor.fit(gme_sample_train, gme_conc_train)

In [5]:
# Make predictions on the test data
labels_pred = clf.predict(gas_sample_test)

print(labels_test)
print(labels_pred)

# Evaluate the accuracy of the model
accuracy = accuracy_score(labels_test, labels_pred)
print("Classifier Accuracy:", accuracy)
print("\n")

# Make regressions on the test data
gco_conc_pred = gco_regressor.predict(gco_sample_test)
gea_conc_pred = gea_regressor.predict(gea_sample_test)
gey_conc_pred = gey_regressor.predict(gey_sample_test)
gme_conc_pred = gme_regressor.predict(gme_sample_test)

gco_mse = mean_squared_error(gco_conc_test, gco_conc_pred)
gea_mse = mean_squared_error(gea_conc_test, gea_conc_pred)
gey_mse = mean_squared_error(gey_conc_test, gey_conc_pred)
gme_mse = mean_squared_error(gme_conc_test, gme_conc_pred)

formatted_list = [round(num, 2) for num in gco_conc_pred]
print("GCO predict concentrations:", formatted_list)
print("GCO actual concentrations:", gco_conc_test)
print("GCO Mean Squared Error:", gco_mse)
print("\n")

formatted_list = [round(num, 2) for num in gea_conc_pred]
print("GEa predict concentrations:", formatted_list)
print("GEa actual concentrations:", gea_conc_test)
print("GEa Mean Squared Error:", gea_mse)
print("\n")

formatted_list = [round(num, 2) for num in gey_conc_pred]
print("GEy predict concentrations:", formatted_list)
print("GEy actual concentrations:", gey_conc_test)
print("GEy Mean Squared Error:", gey_mse)
print("\n")

formatted_list = [round(num, 2) for num in gme_conc_pred]
print("GMe predict concentrations:", formatted_list)
print("GMe actual concentrations:", gme_conc_test)
print("GMe Mean Squared Error:", gme_mse)


['GEa' 'GEa' 'GEa' 'GEy' 'GCO' 'GEa' 'GMe' 'GEy']
['GEa' 'GEa' 'GEa' 'GEy' 'GCO' 'GEa' 'GMe' 'GEy']
Classifier Accuracy: 1.0


GCO predict concentrations: [219.48, 50.96, 143.36]
GCO actual concentrations: [225.  50. 150.]
GCO Mean Squared Error: 25.14813892264948


GEa predict concentrations: [100.89, 16.97, 72.0]
GEa actual concentrations: [112.5  25.   75. ]
GEa Mean Squared Error: 69.4090195307415


GEy predict concentrations: [114.47, 0.67, 83.3]
GEy actual concentrations: [112.5  25.   75. ]
GEy Mean Squared Error: 221.57921463633235


GMe predict concentrations: [171.53, 26.16, 170.53]
GMe actual concentrations: [225.  50. 150.]
GMe Mean Squared Error: 1283.0101900534
