In [1]:
import pandas as pd
import numpy as np
import pickle
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
import os, glob

# Importing the trained model

In [2]:
with open('rf.pkl', 'rb') as file:  
    model = pickle.load(file)

Write the name of the fodler where the spectra you want to analyse are stored

In [3]:
##folder = 'tundmatud' 
file_dir = 'C:\\Users\\Karl\\Nextcloud\\IT_opingud\\IR projekt\\tundmatud' ## Full path

Importing the files from the speciefied folder

In [4]:
# create empty list for filenames, change working dir and add all filenames to list
filelist = []
#file_dir = os.getcwd() + '\\' + folder + '\\' ## alternative when only the folder name is provided not the full path


os.chdir(file_dir)
for files in glob.glob('*.CSV'):
    filelist.append(files)
    
df = pd.DataFrame()         # dataframe kuhu andmed kirjutatakse

for i in range(len(filelist)):
    fileframe = pd.read_csv(filelist[i], header=None, sep=';', index_col=0)
    df[filelist[i]] = fileframe.iloc[:,0]
    
## Repalce all zeros with NaN's
df.replace(0, np.nan, inplace = True)

# transpose a dataframe
df = df.T

In [6]:
# Function to calculate difference from global mean
def calc_globaldif(dfin):
    # fill in NaN's in the spectra
    dfin = fillin(dfin)
    
    # create df with same dims, cols and indexes
    dfout = pd.DataFrame(index = dfin.index, columns = dfin.columns)

    for i in range(len(dfin.index)):        
        dfout.iloc[i,:] = dfin.iloc[i,:] - dfin.iloc[i,:].mean()
    
    dfout = dfout.astype(float)
    return dfout


# Function to calculate difference from local average
def calc_localdif(dfin, n=10):    
    # fill in NaN's in the spectra
    dfin = fillin(dfin)
    
    rows = dfin.shape[0]
    cols = dfin.shape[1]
    
    # create df with same dims, cols and indexes
    dfout = pd.DataFrame(index = dfin.index, columns = dfin.columns)

    for i in range(rows):
        for j in range(n, cols-n):
            dfout.iloc[i,j] = dfin.iloc[i,j] - dfin.iloc[i,j-n:j+n].mean()
        #print(i)
    
    dfout = dfout.astype(float)
    return dfout


# Function to calculate angles for line
def calc_angles(dfin, n=10):
    
    # fill in NaN's in the spectra
    dfin = fillin(dfin)
    
    rows = dfin.shape[0]
    cols = dfin.shape[1]
    dx = -n * 0.2    # approximation for delta x (acts like a scale factor)

    # create df with same dims, cols and indexes
    dfout = pd.DataFrame(index = dfin.index, columns = dfin.columns)

    for i in range(rows):
        # replace all the leading NaN's
        dfin.iloc[i,:].mask(dfin.iloc[i,:].ffill().isnull(), dfin.iloc[i,104], inplace=True)
        for j in range(n, cols-n):
            dy = dfin.iloc[i,j-n] - dfin.iloc[i,j+n]
            dfout.iloc[i,j] = np.arctan(dy/dx) * 180/np.pi
        #print(i)
    
    dfout = dfout.astype(float)
    return dfout

# Function to fill in NAN values in the Spectra using linear interpolation
def fillin(dfin):
    # fill in NaN's in the middle - use linear interpolation
    dfin.interpolate(method="linear", axis=1, inplace=True) 
    return dfin


# Function to standardize df row values
# standardization: subtract the mean value and divide with std
# output will be the distance from mean in std units
def standardize(dfin):
    dfout = pd.DataFrame(index = dfin.index, columns = dfin.columns)
    
    for i in range(len(dfin.index)):        
        dfout.iloc[i,:] = (dfin.iloc[i,:] - dfin.iloc[i,:].mean()) / dfin.iloc[i,:].std()
            
    dfout = dfout.astype(float)
    return dfout


# Function to normilize df row values
# using min max values output will be rescaled to 0...1
def normilize(dfin):
    dfout = pd.DataFrame(index = dfin.index, columns = dfin.columns)
    
    for i in range(len(dfin.index)):        
        dfout.iloc[i,:] = (dfin.iloc[i,:] - dfin.iloc[i,:].min()) / (dfin.iloc[i,:].max() - dfin.iloc[i,:].min())
            
    dfout = dfout.astype(float)
    return dfout

def cut(df):
    ## Selecting ranges of interest from the initial spectra
    df_1 = selectRange(df, 600, 1800)
    df_2 = selectRange(df, 2200, 2300)
    df_3 = selectRange(df, 2600, 3800)
    
    return pd.concat([df_1, df_2, df_3], axis=1)


def selectRange(df, range_start, range_end): ## df contains label column as the last column
    cols = [c for c in df.columns[:-1] if round(float(c),0) in range(range_start,range_end)]
    return df[cols]


# Function to fill in NAN values in the Spectra using linear interpolation
def fillin(dfin):
    # fill in NaN's in the middle - use linear interpolation
    dfin.interpolate(method="linear", axis=1, inplace=True) 
    return dfin


# Calculating the features for the imported spectra

In [7]:
df_glo = cut(standardize(calc_globaldif(df)))
df_loc = cut(standardize(calc_localdif(df, 40)))
df_ang = cut(standardize(calc_angles(df, 50)))
df_all = pd.concat([df_glo, df_loc, df_ang], axis=1)


# Predicting the type of spectra

In [8]:
labels = {'1.1': 'wool',
         '1.5': 'cotton',
         '1.6': 'linen',
         '1.9': 'jute',
         '1.11': 'silk',
         '2.1': 'viscose',
         '2.3': 'acetate',
         '2.6': 'tenzel',
         '3.1': 'polyester',
         '3.2': 'polyamide',
         '3.5': 'polyacrylic',
         '3.11': 'elsastane',
          '3.12': 'polyethylene',
         '3.13': 'polypropylene'}

## Performing the classification
results = model.predict(df_all)

## Printing out the results
for i in range(len(results)):
    print(filelist[i] + ' is predicted to be the spectra of ' + labels[results[i]])

Liili5_HPeets okt2018_test20002.CSV is predicted to be the spectra of viscose
Liili5_HPeets okt2018_test30004.CSV is predicted to be the spectra of cotton
Liili5_HPeets okt2018_test40003.CSV is predicted to be the spectra of silk
Liili5_HPeets okt2018_test50004.CSV is predicted to be the spectra of silk


In [10]:
model.predict_proba(df_all)

array([[0.00666667, 0.01166667, 0.09833333, 0.22      , 0.065     ,
        0.585     , 0.        , 0.        , 0.00166667, 0.        ,
        0.01      , 0.00166667],
       [0.00666667, 0.00833333, 0.44166667, 0.135     , 0.075     ,
        0.32833333, 0.00166667, 0.        , 0.00166667, 0.        ,
        0.00166667, 0.        ],
       [0.01166667, 0.96      , 0.00333333, 0.00166667, 0.        ,
        0.        , 0.00166667, 0.00166667, 0.005     , 0.00333333,
        0.00666667, 0.005     ],
       [0.05      , 0.90666667, 0.00166667, 0.00166667, 0.        ,
        0.00833333, 0.        , 0.00333333, 0.005     , 0.005     ,
        0.015     , 0.00333333]])