diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..9f11b75
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+.idea/
diff --git a/Data_preparation.py b/Data_preparation.py
deleted file mode 100644
index 0d847a1..0000000
--- a/Data_preparation.py
+++ /dev/null
@@ -1,175 +0,0 @@
-import pickle
-import numpy as np
-import pandas as pd
-from tqdm import tqdm
-from keras.preprocessing.sequence import pad_sequences
-
-# After exporting the relational database to separate tables with .csv extension, the transformation can begin
-# The first step is to read the cvs files as Dataframes
-df_taskset = pd.read_csv('TaskSet.csv') # import task-sets
-# print(df_taskset.head()) if you want to see how the data look like
-
-df_task = pd.read_csv('Task.csv')   # import tasks
-# print(df_task.head())
-
-df_job = pd.read_csv('Job.csv')  # import jobs
-# print(df_job.head())
-
-
-# 2. data transformation
-
-# here starts data transformation
-ntn = df_task[['PKG']].values  # get values from PKG in tasks. This step is equivalent to: Select distinct PKG from Task
-ntn1 = []
-for n in ntn:
-    ntn1.append(n[0])
-print(np.unique(ntn1)) # print the unique values
-
-
-
-# PKG has a fixed set of labels. Integer encoding is used where integer # value is assigned to each label
-PKGs = {}
-PKGs['pi'] = 0
-PKGs['hey'] = 1
-PKGs['tumatmul'] = 2
-PKGs['cond_mod'] = 3
-
-# INteger encoding for Exit_Values from Jobs
-Exit_Values = {}
-Exit_Values['EXIT'] = 1
-Exit_Values['EXIT_CRITICAL'] = 0
-
-
-# ARG values ranged from 1 to 205.891.132.094.649, these values were normalized and scaled # to range from 1 to 17
-Arg_Values = {}
-Arg_Values[1] = 1
-Arg_Values[4096] = 2
-Arg_Values[8192] = 3
-Arg_Values[16384] = 4
-Arg_Values[32768] = 5
-Arg_Values[65536] = 6
-Arg_Values[131072] = 7
-Arg_Values[262144] = 8
-Arg_Values[524288] = 9
-Arg_Values[1048576] = 10
-Arg_Values[2097152] = 11
-Arg_Values[847288609443] = 12
-Arg_Values[2541865828329] = 13
-Arg_Values[7625597484987] = 14
-Arg_Values[22876792454961] = 15
-Arg_Values[68630377364883] = 16
-Arg_Values[205891132094649] = 17
-
-
-
-# 3. Features and Labels extraction
-i = 0
-
-features = [] # create an empty list for features
-labels = [] # create an empty list for labels
-# loop in the task-set
-with tqdm ( total=len (
-        list(df_taskset.iterrows()))) as pbar:  # the total length would be total=len(list(df_taskset.iterrows()))
-    for index, row in df_taskset.iterrows ():
-
-        try:
-
-            i += 1
-            grid = int(df_taskset.loc[index, 'Set_ID']) # task_set ID
-            first_task = int(df_taskset.loc[index, 'TASK1_ID']) # first task_id
-            second_task = int(df_taskset.loc[index, 'TASK2_ID']) # second task_id
-            third_task = int(df_taskset.loc[index, 'TASK3_ID']) # third task_id
-            fourth_task = int(df_taskset.loc[index, 'TASK4_ID']) # fourth task_id
-            tasks = []  # empty list of tasks where features are saved later
-
-            if first_task != -1: # if the first task exists in this task-set then :
-
-                task_info = df_task.loc[df_task['Task_ID'] == first_task]
-                tasks.append(int(task_info['Priority']))   # save the priority
-                tasks.append(int(task_info['Period']/1000)) # save the period in seconds
-                tasks.append(int(task_info['Number_of_Jobs'])) # save number of jobs
-                n = str(task_info['PKG'].item())
-                tasks.append(PKGs[n])             #save the numerical value of PKG
-                av = int(task_info['Arg'].item())
-                tasks.append(Arg_Values[av])    #save the scaled value of Arg
-                tasks.append(int(task_info['CRITICALTIME']/1000))   # save criticaltime in seconds
-                # for each job in that is in the task and has this task_set id
-                job_info = df_job.loc[(df_job['Task_ID'] == first_task) & (df_job['Set_ID'] == grid)]
-
-                for ind, r in job_info.iterrows():
-                    tasks.append(Exit_Values[job_info.loc[ind, 'Exit_Value']])  # save the transformed exit value
-
-            if second_task != -1:  # if the second task exists in this task-set then :
-                first_task = second_task
-                task_info = df_task.loc[df_task['Task_ID'] == first_task]
-                tasks.append(int(task_info['Priority']))
-                tasks.append(int(task_info['Period']/1000))
-                tasks.append(int(task_info['Number_of_Jobs']))
-                n = str(task_info['PKG'].item())
-                tasks.append(PKGs[n])
-                av = int(task_info['Arg'].item())
-                tasks.append(Arg_Values[av])
-                tasks.append(int(task_info['CRITICALTIME']/1000))
-                print(tasks)
-                job_info = df_job.loc[(df_job['Task_ID'] == first_task) & (df_job['Set_ID'] == grid)]
-                for ind, r in job_info.iterrows():
-                    tasks.append(Exit_Values[job_info.loc[ind, 'Exit_Value']])
-
-            if third_task != -1:   # if the third task exists in this task-set then :
-                first_task = third_task
-                task_info = df_task.loc[df_task['Task_ID'] == first_task]
-                tasks.append(int(task_info['Priority']))
-                tasks.append(int(task_info['Period']/1000))
-                tasks.append(int(task_info['Number_of_Jobs']))
-                n = str(task_info['PKG'].item())
-                tasks.append(PKGs[n])
-                av = int(task_info['Arg'].item())
-                tasks.append(Arg_Values[av])
-                tasks.append(int(task_info['CRITICALTIME']/1000))
-
-                job_info = df_job.loc[(df_job['Task_ID'] == first_task) & (df_job['Set_ID'] == grid)]
-                for ind, r in job_info.iterrows():
-                    tasks.append(Exit_Values[job_info.loc[ind, 'Exit_Value']])
-
-
-            if fourth_task != -1: # if the fourth task exists in this task-set then :
-                first_task = fourth_task
-                task_info = df_task.loc[df_task['Task_ID'] == first_task]
-                tasks.append(int(task_info['Priority']))
-                tasks.append(int(task_info['Period']/1000))
-                tasks.append(int(task_info['Number_of_Jobs']))
-                n = str(task_info['PKG'].item())
-                tasks.append(PKGs[n])
-                av = int(task_info['Arg'].item())
-                tasks.append(Arg_Values[av])
-                tasks.append(int(task_info['CRITICALTIME']/1000))
-
-                job_info = df_job.loc[(df_job['Task_ID'] == first_task) & (df_job['Set_ID'] == grid)]
-                for ind, r in job_info.iterrows():
-                    tasks.append(Exit_Values[job_info.loc[ind, 'Exit_Value']])
-
-
-            tasks = np.array(tasks)  #  to save the task list as numpy array
-            features.append(tasks)  # values in tasks are features
-            labels.append(int(df_taskset.loc[index, 'Successful'])) # in the label list, append the value in the successful col from task-set
-        except Exception as e:  # exception handler
-            print(e)
-            pass
-        pbar.update(1)
-
-
-
-labels = np.array(labels) #  to save the labels list as numpy array
-
-# To make a fixed length vector, if the vector is smaller than 56 then replace the empty values with -1. if longer than 56 trim the value
-features = pad_sequences(features, maxlen=56, value=-1, padding='post', truncating='post')
-
-#print(features.shape) # the dimensionality of features
-#print(labels.shape) # the dimensionality of labels
-
-#  save both files for the training
-with open ( '56_features', 'wb' ) as outfile:  # 'wb' is the file mode, it means 'write binary'
-    pickle.dump(features, outfile)
-
-with open ( '56_labels', 'wb' ) as outfile:
-    pickle.dump(labels, outfile)
diff --git a/README.md b/README.md
index 70d2e70..c488bc9 100644
--- a/README.md
+++ b/README.md
@@ -24,7 +24,7 @@ The first step is to preprocess the data. The database was imported and transfor
     5. Task Critical time: Integer
     6. Number of Jobs: Integer
 From Jobs only one feature was selected: Job Exit_Value: String.
-After exporting all tables, start with Data_preparation.py. Line 165 is responsible for the length of the feature vector. 
+After exporting all tables, start with Data_preparation.py.
 Feature and labels are save in the end. 
 
 
@@ -35,7 +35,7 @@ CuDNNLSTM.py. When using CPU, install Tensorflow and replace CuDNNLSTM with LSTM
 Evaluation.py. Evaluation prints the confusion matrix and classification report. Tensorboard can be launched by typing tensorboard -–logdir=logs/ into the terminal and logs from trained models can be visualized 
 
 **4. Prediction:**
-predictin.py. A CSV file will be save with actual and predictied values. The trained model should be loaded first.
+prediction.py. A CSV file will be save with actual and predicted values. The trained model should be loaded first.
 
 **5. Plotting:**
 Plotting.py. Another way to visualize the model built.
diff --git a/Vagrantfile b/Vagrantfile
new file mode 100644
index 0000000..434703f
--- /dev/null
+++ b/Vagrantfile
@@ -0,0 +1,73 @@
+# -*- mode: ruby -*-
+# vi: set ft=ruby :
+
+# All Vagrant configuration is done below. The "2" in Vagrant.configure
+# configures the configuration version (we support older styles for
+# backwards compatibility). Please don't change it unless you know what
+# you're doing.
+Vagrant.configure("2") do |config|
+  # The most common configuration options are documented and commented below.
+  # For a complete reference, please see the online documentation at
+  # https://docs.vagrantup.com.
+
+  # Every Vagrant development environment requires a box. You can search for
+  # boxes at https://atlas.hashicorp.com/search.
+  config.vm.box = "ubuntu/xenial64"
+
+  # Disable automatic box update checking. If you disable this, then
+  # boxes will only be checked for updates when the user runs
+  # `vagrant box outdated`. This is not recommended.
+  # config.vm.box_check_update = false
+
+  # Create a forwarded port mapping which allows access to a specific port
+  # within the machine from a port on the host machine. In the example below,
+  # accessing "localhost:8080" will access port 80 on the guest machine.
+  # config.vm.network "forwarded_port", guest: 80, host: 8080
+
+  # Create a private network, which allows host-only access to the machine
+  # using a specific IP.
+  # config.vm.network "private_network", ip: "192.168.33.10"
+  # config.vm.network "public_network", ip: "127.0.0.1", bridge: "enp0s25"
+
+
+  # Create a public network, which generally matched to bridged network.
+  # Bridged networks make the machine appear as another physical device on
+  # your network.
+  config.vm.network "public_network", :mac => "0A0100000000", :auto_config => false
+
+  # Share an additional folder to the guest VM. The first argument is
+  # the path on the host to the actual folder. The second argument is
+  # the path on the guest to mount the folder. And the optional third
+  # argument is a set of non-required options.
+  # config.vm.synced_folder "../data", "/vagrant_data"
+
+  # Provider-specific configuration so you can fine-tune various
+  # backing providers for Vagrant. These expose provider-specific options.
+  # Example for VirtualBox:
+  #
+   config.vm.provider "virtualbox" do |vb|
+  #   # Display the VirtualBox GUI when booting the machine
+     vb.gui = false
+  #
+  #   # Customize the amount of memory on the VM:
+      vb.memory = 4096
+      vb.cpus = 2 
+   end
+  #
+  # View the documentation for the provider you are using for more
+  # information on available options.
+
+  # Define a Vagrant Push strategy for pushing to Atlas. Other push strategies
+  # such as FTP and Heroku are also available. See the documentation at
+  # https://docs.vagrantup.com/v2/push/atlas.html for more information.
+  # config.push.define "atlas" do |push|
+  #   push.app = "YOUR_ATLAS_USERNAME/YOUR_APPLICATION_NAME"
+  # end
+
+  # Enable provisioning with a shell script. Additional provisioners such as
+  # Puppet, Chef, Ansible, Salt, and Docker are also available. Please see the
+  # documentation for more information about their specific syntax and use.
+    config.vm.provision "shell", path:"provision.sh", privileged:false;
+    config.vm.provision "shell",  path:"bootstrap.sh" , run:"always";
+
+end
diff --git a/bootstrap.sh b/bootstrap.sh
new file mode 100644
index 0000000..f5e64fd
--- /dev/null
+++ b/bootstrap.sh
@@ -0,0 +1,15 @@
+
+##############################
+#
+# This is a bootstrap script which is
+# run at every startup of the vagrant machine
+# If you want to run something just once at provisioning
+# and first bootup of the vagrant machine please see
+# provision.sh
+#
+# Contributor: Bernhard Blieninger
+##############################
+
+python3 -m venv lstm-virtenv
+source lstm-virtenv/bin/activate
+pip3 install  -r python3-lstm/requirements.txt
diff --git a/prediction.py b/prediction.py
deleted file mode 100644
index 3280154..0000000
--- a/prediction.py
+++ /dev/null
@@ -1,153 +0,0 @@
-import pickle
-import numpy as np
-import pandas as pd
-from tqdm import tqdm
-from keras.preprocessing.sequence import pad_sequences
-from keras.models import load_model
-import csv
-
-df_taskset = pd.read_csv ( 'TaskSet.csv' )
-# df_taskset = df_taskset.sample(frac=0.0001, random_state=99)
-df_task = pd.read_csv ( 'Task.csv' )
-df_job = pd.read_csv ( 'Job.csv' )
-
-ntn = df_task[['PKG']].values
-ntn1 = []
-for n in ntn:
-    ntn1.append ( n[0] )
-
-PKGs = {}
-PKGs['pi'] = 0
-PKGs['hey'] = 1
-PKGs['tumatmul'] = 2
-PKGs['cond_mod'] = 3
-
-Exit_Values = {}
-Exit_Values['EXIT'] = 1
-Exit_Values['EXIT_CRITICAL'] = 0
-
-Arg_Values = {}
-Arg_Values[1] = 1
-Arg_Values[4096] = 2
-Arg_Values[8192] = 3
-Arg_Values[16384] = 4
-Arg_Values[32768] = 5
-Arg_Values[65536] = 6
-Arg_Values[131072] = 7
-Arg_Values[262144] = 8
-Arg_Values[524288] = 9
-Arg_Values[1048576] = 10
-Arg_Values[2097152] = 11
-Arg_Values[847288609443] = 12
-Arg_Values[2541865828329] = 13
-Arg_Values[7625597484987] = 14
-Arg_Values[22876792454961] = 15
-Arg_Values[68630377364883] = 16
-Arg_Values[205891132094649] = 17
-
-i = 0
-features = []
-labels = []
-with tqdm(total=len(list(df_taskset.iterrows()))) as pbar:
-    for index, row in df_taskset.iterrows():
-
-        try:
-
-            i += 1
-            grid = int(df_taskset.loc[index, 'Set_ID'])
-            res = int(df_taskset.loc[index, 'Successful'])
-            print(grid)
-            first_task = int(df_taskset.loc[index, 'TASK1_ID'])
-            second_task = int(df_taskset.loc[index, 'TASK2_ID'])
-            third_task = int(df_taskset.loc[index, 'TASK3_ID'])
-            fourth_task = int(df_taskset.loc[index, 'TASK4_ID'])
-            tasks = []
-
-            if first_task != -1:
-
-                task_info = df_task.loc[df_task['Task_ID'] == first_task]
-                tasks.append(int(task_info['Priority']))
-                tasks.append(int(task_info['Period'] / 1000))
-                tasks.append(int(task_info['Number_of_Jobs']))
-                n = str(task_info['PKG'].item())
-                tasks.append(PKGs[n])
-                tasks.append(int(task_info['Arg']))
-                tasks.append(int(task_info['CRITICALTIME'] / 1000))
-                job_info = df_job.loc[(df_job['Task_ID'] == first_task) & (df_job['Set_ID'] == grid)]
-                for ind, r in job_info.iterrows():
-                    tasks.append(Exit_Values[job_info.loc[ind, 'Exit_Value']])
-
-            if second_task != -1:
-                first_task = second_task
-                task_info = df_task.loc[df_task['Task_ID'] == first_task]
-                tasks.append(int(task_info['Priority']))
-                tasks.append(int(task_info['Period'] / 1000))
-                tasks.append(int(task_info['Number_of_Jobs']))
-                n = str(task_info['PKG'].item())
-                tasks.append(PKGs[n])
-                tasks.append(int(task_info['Arg']))
-                tasks.append(int(task_info['CRITICALTIME'] / 1000))
-                job_info = df_job.loc[(df_job['Task_ID'] == first_task) & (df_job['Set_ID'] == grid)]
-                for ind, r in job_info.iterrows():
-                    tasks.append(Exit_Values[job_info.loc[ind, 'Exit_Value']])
-
-            if third_task != -1:
-                first_task = third_task
-                task_info = df_task.loc[df_task['Task_ID'] == first_task]
-                tasks.append(int(task_info['Priority']))
-                tasks.append(int(task_info['Period'] / 1000))
-                tasks.append(int(task_info['Number_of_Jobs']))
-                n = str(task_info['PKG'].item())
-                tasks.append(PKGs[n])
-                tasks.append(int(task_info['Arg']))
-                tasks.append( int ( task_info['CRITICALTIME'] / 1000))
-                job_info = df_job.loc[(df_job['Task_ID'] == first_task) & (df_job['Set_ID'] == grid)]
-                for ind, r in job_info.iterrows():
-                    tasks.append(Exit_Values[job_info.loc[ind, 'Exit_Value']])
-
-            if fourth_task != -1:
-                first_task = fourth_task
-                task_info = df_task.loc[df_task['Task_ID'] == first_task]
-                tasks.append(int(task_info['Priority']))
-                tasks.append(int(task_info['Period']))
-                tasks.append(int(task_info['Number_of_Jobs']))
-                n = str(task_info['PKG'].item())
-                tasks.append(PKGs[n])
-                tasks.append(int(task_info['Arg']))
-                tasks.append(int(task_info['CRITICALTIME']))
-                job_info = df_job.loc[(df_job['Task_ID'] == first_task) & (df_job['Set_ID'] == grid)]
-                for ind, r in job_info.iterrows():
-                    tasks.append(Exit_Values[job_info.loc[ind, 'Exit_Value']])
-                labels = np.array(int(df_taskset.loc[index, 'Successful']))
-
-            tasks = np.array(tasks)
-            features.append(tasks)
-            labels.append(res)
-        except Exception as e:
-            print(e)
-            pass
-        pbar.update(1)
-
-labels = np.array(labels)
-features = pad_sequences(features, maxlen=42, value=-1, padding='post', truncating='post')
-
-model = load_model('My_LSTM_Model.h5')
-X = np.expand_dims(features, axis=2)
-preds = model.predict(X)
-
-arr = []
-for i in range(len(labels)):
-    l = labels[i]
-    p = np.argmax(preds[i])
-    print ( "the actual value is{0}and the predicted value is {1}".format(l, p))
-    arr.append([i + 1, l, p])
-
-csvfile = "Predicion_results.csv"
-
-i = 0
-with open(csvfile, "w") as output:
-    writer = csv.writer(output, lineterminator='\n')
-    if (i == 0):
-        writer.writerow(["TaskSet ID", "Actual Value", "Predicted Value"])
-    i += 1
-    writer.writerows(arr)
diff --git a/provision.sh b/provision.sh
new file mode 100755
index 0000000..5b4f244
--- /dev/null
+++ b/provision.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+#######################
+#
+# This is a provision script
+# it will be called once when the vagrant vm is first provisioned
+# If you have commands that you want to run always please have a
+# look at the bootstrap.sh script
+#
+# Contributor: Bernhard Blieninger, Robert Hamsch
+######################
+
+sudo apt update -qq
+
+sudo apt install python3.5 python3-pip tmux -qq
+
+sudo apt install python3-venv
+#pip3 install --user virtualenv
diff --git a/CuDNNLSTM.py b/python3-lstm/CuDNNLSTM.py
similarity index 83%
rename from CuDNNLSTM.py
rename to python3-lstm/CuDNNLSTM.py
index 5fd9629..1a28871 100644
--- a/CuDNNLSTM.py
+++ b/python3-lstm/CuDNNLSTM.py
@@ -1,5 +1,10 @@
 import pickle
 import time
+import warnings
+warnings.filterwarnings('ignore',category=FutureWarning)
+#ignore deprecation warnings to get a better and cleaner output
+from tensorflow.python.util import deprecation
+deprecation._PRINT_DEPRECATION_WARNINGS = False
 import tensorflow as tf
 import numpy as np
 from keras.callbacks import TensorBoard
@@ -10,6 +15,8 @@
 from keras.optimizers import Adam
 from sklearn.model_selection import train_test_split
 
+
+
 name = "logname-{}".format ( int ( time.time () ) )
 
 # both metrics and early stopping conditions are defined here and then saved in the log42 file
@@ -18,9 +25,9 @@
 es = EarlyStopping ( monitor='val_loss', mode='min', verbose=1 )  # define early stopping criteria
 
 # Importing the the extracted features and labels
-with open ( '56_features', 'rb' ) as fp:
+with open ( '42_features', 'rb' ) as fp:
     X = pickle.load ( fp )
-with open ( '56_labels', 'rb' ) as fp:
+with open ( '42_labels', 'rb' ) as fp:
     y = pickle.load ( fp )
 
 # LSTM’s input shape argument expects a three-dimensional array as an input in this order: Samples, timestamps and features. This is why we need to add another dimention to the numpy array.
@@ -39,15 +46,14 @@
 # print ( count )
 
 # devide data into training and test sets
-X_train, X_test, y_train, y_test = train_test_split ( X, y, test_size=0.3 )
+X_train, X_test, y_train, y_test = train_test_split ( X, y, test_size=0.3 ,random_state=42)
 # print ( X_train.shape )
 
 # LSTM input is fifty-six time-steps and one feature at each time-step is represented by the notation: (56,1).
-input = Input ( shape=(56, 1) )
+input = Input ( shape=(42, 1) )
 
 # the first LSTM layer has 64 cells, the number must be equal/bigger than the input size. If you are using a CPU then change CuDNNLSTM to LSTM
-lstm = CuDNNLSTM ( 64, return_sequences=True ) (
-    input )  # Return_sequences is set true because the first LSTM has to return a sequence, which then can be fed into the 2nd LSTM
+lstm = CuDNNLSTM ( 64, return_sequences=True ) ( input )  # Return_sequences is set true because the first LSTM has to return a sequence, which then can be fed into the 2nd LSTM
 lstm = CuDNNLSTM ( 128, return_sequences=True ) ( lstm )
 lstm = CuDNNLSTM ( 256 ) ( lstm )
 
diff --git a/python3-lstm/Data_preparation.py b/python3-lstm/Data_preparation.py
new file mode 100644
index 0000000..0f8bbad
--- /dev/null
+++ b/python3-lstm/Data_preparation.py
@@ -0,0 +1,176 @@
+
+import warnings
+warnings.filterwarnings('ignore',category=FutureWarning)
+import pickle
+import sys
+import numpy as np
+from keras.preprocessing.sequence import pad_sequences
+import sqlite3
+
+debug = False
+
+
+# PKG has a fixed set of labels. Integer encoding is used where integer # value is assigned to each label
+PKGs = {
+        'pi' : 0,
+        'hey' : 1,
+        'tumatmul' : 2,
+        'cond_mod' : 3
+        }
+
+# Integer encoding for Exit_Values from Jobs
+Exit_Values = {
+        'EXIT' : 1,
+        'EXIT_CRITICAL' : 0,
+        'EXIT_PERIOD' : 2,
+        'OUT_OF_CAPS' : 3,
+        'OUT_OF_QUOTA' : 4,
+        'EXIT_ERROR' : 5
+        }
+
+# ARG values ranged from 1 to 205.891.132.094.649, these values were normalized and scaled # to range from 1 to 17
+Arg_Values = {
+        1 : 1,
+        4096 : 2,
+        8192 : 3,
+        16384 : 4,
+        32768 : 5,
+        65536 : 6,
+        131072 : 7,
+        262144 : 8,
+        524288 : 9,
+        1048576 : 10,
+        2097152 : 11,
+        847288609443 : 12,
+        2541865828329 : 13,
+        7625597484987 : 14,
+        22876792454961 : 15,
+        68630377364883 : 16,
+        205891132094649 : 17
+        }
+
+
+if debug:
+    print("Doing writing")
+
+DB_PATH = sys.argv[1]
+TASKS_DICT = {}
+
+def taskToFeatureList(task):
+    #returns a fature list for the corresponding task values
+    feature = []
+    feature.append(task['Priority'])
+    feature.append(task['Period'])
+    feature.append(task['Number_of_Jobs'])
+    feature.append(task['PKG'])
+    feature.append(task['Arg'])
+    feature.append(task['CRITICALTIME'])
+    return feature
+
+
+def getTaskFeatures(db_path): #c is the cursor for the db
+    # returns a dictionary 
+    # { task_id : [ feature, list ]
+    conn = sqlite3.connect(db_path)
+    conn.row_factory = lambda C, R: { c[0]: R[i] for i, c in enumerate(C.description) }
+    db_cursor  = conn.cursor()
+    db_cursor.execute('select Task_ID,Priority,Period,PKG,Arg,CRITICALTIME,Number_of_Jobs from Task')
+    outputTable  = db_cursor.fetchall()
+
+    tasks_dict = {}
+    for row in outputTable:
+        row['Period'] = int(row['Period']/1000)
+        row['Number_of_Jobs'] = int(row['Number_of_Jobs'])
+        row['PKG'] = PKGs[row['PKG']]
+        row['CRITICALTIME'] = int(row['CRITICALTIME']/1000)
+        row['Arg'] = Arg_Values[row['Arg']]
+        tasks_dict[row['Task_ID']] = taskToFeatureList(row)
+    return tasks_dict
+
+
+def processTaskset(tasksetData):
+    # tasksetData is a list of tuples returned from the DB in getTasksetData()
+    label = tasksetData[0][-1]
+    features = []
+    jobExitsByTask = {}
+    for tsData in tasksetData:
+        try:
+            jobExitsByTask[tsData[4]].append(Exit_Values[tsData[5]])
+        except KeyError:
+            jobExitsByTask[tsData[4]] = [Exit_Values[tsData[5]]]
+    for taskIdNo in (1,2,3):
+        if tasksetData[0][taskIdNo] != -1:
+            features += TASKS_DICT[tasksetData[0][taskIdNo]]
+            try:
+                features += jobExitsByTask[tasksetData[0][taskIdNo]]
+            except KeyError:
+                features += [Exit_Values['EXIT_ERROR']]
+    return np.array(features), label
+
+
+def getFeaturesLabels(db_path):
+    conn = sqlite3.connect(db_path)
+    db_cursor = conn.cursor()
+    command = 'SELECT TaskSet.Set_ID, TaskSet.TASK1_ID, TaskSet.TASK2_ID, TaskSet.TASK3_ID, Job.Task_ID, Job.Exit_Value, TaskSet.Successful'\
+            ' FROM TaskSet JOIN Job'\
+            ' ON TaskSet.Set_ID = Job.Set_ID and'\
+            ' (TaskSet.TASK1_ID == Job.Task_ID or'\
+            ' TaskSet.TASK2_ID == Job.Task_ID or'\
+            ' TaskSet.TASK3_ID == Job.Task_ID);'
+    db_cursor.execute(command)
+    # data_table format: [( TaskSet.Set_ID, TaskSet.TASK1_ID, TaskSet.TASK2_ID, TaskSet.TASK3_ID, Job.Task_ID, Job.Exit_Value, TaskSet.Successful)]
+    data_table = db_cursor.fetchall()
+
+    finalFeatureList = []
+    finalLabelList = []
+    currentTset = data_table[0][0] # first taskset id
+    tSetJobs = []
+    totalSize = len(data_table)
+    for row in data_table:
+        if row[0] == currentTset:
+            #then still same setTset
+            tSetJobs.append(row)
+        else:
+            # job of next taskset
+            # process data and record new
+            features, label = processTaskset(tSetJobs)
+            finalFeatureList.append(features)
+            finalLabelList.append(label)
+            tSetJobs = []
+            currentTset = row[0]
+            tSetJobs.append(row)
+    # process last taskset
+    features, label = processTaskset(tSetJobs)
+    finalFeatureList.append(features)
+    finalLabelList.append(label)
+    return finalFeatureList, finalLabelList
+
+
+
+TASKS_DICT = getTaskFeatures(DB_PATH)
+
+if debug:
+    print('Tasks have been added to TASKS_DICT')
+    print('length of taskdict: ', len(TASKS_DICT))
+    print('example task 222:',TASKS_DICT[222])
+
+features, labels = getFeaturesLabels(DB_PATH)
+
+labels = np.array(labels) #  to save the labels list as numpy array
+
+# To make a fixed length vector, if the vector is smaller than 56 then replace the empty values with -1. if longer than 56 trim the value
+features = pad_sequences(features, maxlen=42, value=-1, padding='post', truncating='post')
+
+if debug:
+    print(features.shape) # the dimensionality of features
+    input()
+    print(labels.shape) # the dimensionality of labels
+    input()
+
+#  save both files for the training
+with open ( '42_features', 'wb' ) as outfile:  # 'wb' is the file mode, it means 'write binary'
+    pickle.dump(features, outfile)
+
+with open ( '42_labels', 'wb' ) as outfile:
+    pickle.dump(labels, outfile)
+
diff --git a/Evaluation.py b/python3-lstm/Evaluation.py
similarity index 88%
rename from Evaluation.py
rename to python3-lstm/Evaluation.py
index ffd9811..1ffccc8 100644
--- a/Evaluation.py
+++ b/python3-lstm/Evaluation.py
@@ -12,9 +12,9 @@
 from sklearn import metrics
 from sklearn.model_selection import train_test_split
 
-with open ( '56_features', 'rb' ) as fp:
+with open ( '42_features', 'rb' ) as fp:
     X = pickle.load ( fp )
-with open ( '56_labels', 'rb' ) as fp:
+with open ( '42_labels', 'rb' ) as fp:
     y = pickle.load ( fp )
 
 X = np.expand_dims ( X, axis=2 )
@@ -30,7 +30,7 @@
 y = np.array ( newy )
 print ( count )
 
-X_train, X_test, y_train, y_test = train_test_split ( X, y, test_size=0.3 )
+X_train, X_test, y_train, y_test = train_test_split ( X, y, test_size=0.3, random_state=42)
 print ( X_train.shape )
 
 model = load_model ( 'My_LSTM_Model.h5' )  # loading saved model
@@ -58,8 +58,8 @@
 
 plt.figure ( figsize=(5.5, 4) )
 sns.heatmap ( cm_df, annot=True, fmt='g' )
-plt.title ( 'Confusoin Matrix \n Accuracy:{0:.3f}'.format ( accuracy_score ( yt, yp ) ) )
+plt.title ( 'Confusion Matrix \n Accuracy:{0:.3f}'.format ( accuracy_score ( yt, yp ) ) )
 plt.ylabel ( 'True label' )
 plt.xlabel ( 'Predicted label' )
 plt.show ()
-plt.savefig ( 'Confusoin_Matrix.png' )
+plt.savefig ( 'Confusion_Matrix.png' )
diff --git a/Plotting.py b/python3-lstm/Plotting.py
similarity index 100%
rename from Plotting.py
rename to python3-lstm/Plotting.py
diff --git a/parallel_search.py b/python3-lstm/parallel_search.py
similarity index 100%
rename from parallel_search.py
rename to python3-lstm/parallel_search.py
diff --git a/python3-lstm/prediction.py b/python3-lstm/prediction.py
new file mode 100644
index 0000000..8dc2274
--- /dev/null
+++ b/python3-lstm/prediction.py
@@ -0,0 +1,31 @@
+import numpy as np
+import pickle
+from keras.models import load_model
+import csv
+
+with open ( '42_features', 'rb' ) as outfile:  # 'wb' is the file mode, it means 'write binary'
+    features = pickle.load(outfile, fix_imports=True)
+
+with open ( '42_labels', 'rb' ) as outfile:
+    labels = pickle.load(outfile, fix_imports=True)
+
+model = load_model('My_LSTM_Model.h5')
+X = np.expand_dims(features, axis=2)
+preds = model.predict(X)
+
+arr = []
+for i in range(len(labels)):
+    l = labels[i]
+    p = np.argmax(preds[i])
+    print ( "the actual value is {0} and the predicted value is {1}".format(l, p))
+    arr.append([i + 1, l, p])
+
+csvfile = "Predicion_results.csv"
+
+i = 0
+with open(csvfile, "w") as output:
+    writer = csv.writer(output, lineterminator='\n')
+    if (i == 0):
+        writer.writerow(["TaskSet ID", "Actual Value", "Predicted Value"])
+    i += 1
+    writer.writerows(arr)
diff --git a/python3-lstm/requirements.txt b/python3-lstm/requirements.txt
new file mode 100644
index 0000000..99bb79b
--- /dev/null
+++ b/python3-lstm/requirements.txt
@@ -0,0 +1,8 @@
+keras==2.2.5
+matplotlib==3.1.1
+numpy==1.17.2
+pandas==0.25.1
+seaborn==0.9.0
+scikit_learn==0.21.3
+tensorboard==1.14.0
+tensorflow==1.14.0