<a href="https://colab.research.google.com/github/rfahrn/Shared_Task/blob/main/SentimentAliSalhi.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
# If there's a GPU available...
if torch.cuda.is_available():    
    # Tell PyTorch to use the GPU.  

    device = torch.device("cuda")
    print('There are %d GPU(s) available.' % torch.cuda.device_count())
    print('We will use the GPU:', torch.cuda.get_device_name(0))
    !nvidia-smi
    
# If not...
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

There are 1 GPU(s) available.
We will use the GPU: Tesla P100-PCIE-16GB
Sat Jul 10 22:31:26 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.42.01    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   37C    P0    33W / 250W |   5503MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+---------------

In [None]:
# install needed libraries
!pip install pyarabic
!pip install emoji
!pip install pystemmer
!pip install optuna==2.3.0
!pip install transformers==4.2.1



In [None]:
# import needed libraries
import numpy as np
import pandas as pd
import pyarabic.araby as ar

import re , emoji, Stemmer, functools, operator, string
import torch , optuna, gc, random, os

from tqdm import tqdm_notebook as tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, f1_score, confusion_matrix, precision_score , recall_score
from transformers import AutoConfig, AutoModelForSequenceClassification, AutoTokenizer, BertTokenizer
from transformers.data.processors import SingleSentenceClassificationProcessor
from transformers import Trainer , TrainingArguments
from transformers.trainer_utils import EvaluationStrategy
from transformers.data.processors.utils import InputFeatures
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from sklearn.utils import resample

import logging

logging.basicConfig(level=logging.WARNING)
logger = logging.getLogger(__name__)

In [None]:
# a class representing the dataset
class Dataset:
    def __init__(
        self,
        name,
        train,
        test,
        label_list,
    ):
        self.name = name
        self.train = train
        self.test = test
        self.label_list = label_list

In [None]:
# 
class BERTModelDataset(Dataset):
    def __init__(self, text, target, model_name, max_len, label_map):
      super(BERTModelDataset).__init__()
      self.text = text
      self.target = target
      self.tokenizer_name = model_name
      self.tokenizer = AutoTokenizer.from_pretrained(model_name)
      self.max_len = max_len
      self.label_map = label_map
  
    def __len__(self):
      return len(self.text)

    def __getitem__(self,item):
      text = str(self.text[item])
      text = " ".join(text.split())
    
      encoded_review = self.tokenizer.encode_plus(
      text,
      max_length= self.max_len,
      add_special_tokens= True,
      return_token_type_ids=False,
      pad_to_max_length=True,
      truncation='longest_first',
      return_attention_mask=True,
      return_tensors='pt'
    )
      input_ids = encoded_review['input_ids'].to(device)
      attention_mask = encoded_review['attention_mask'].to(device)

      return InputFeatures(input_ids=input_ids.flatten(), attention_mask=attention_mask.flatten(), label=self.label_map[self.target[item]])

In [None]:
#Tweets Cleaning Function 
st =  Stemmer.Stemmer('arabic')
def data_cleaning (text):
  #print(text)
  #chars_regex = r"0-9\u0621-\u063A\u0640-\u066C\u0671-\u0674a-zA-Z\[\]!\"#\$%\'\(\)\*\+,\.:;\-<=·>?@\[\\\]\^_ـ`{\|}~—٪’،؟`୍“؛”ۚ»؛\s+«–…‘"
  text = re.sub(r'^https?:\/\/.*[\r\n]*', '', text, flags=re.MULTILINE)
  text = re.sub(r'^http?:\/\/.*[\r\n]*', '', text, flags=re.MULTILINE)
  text = re.sub(r"http\S+", "", text)
  text = re.sub(r"https\S+", "", text)
  text = re.sub(r'\s+', ' ', text)
  text = re.sub("(\s\d+)","",text) 
  text = re.sub(r"$\d+\W+|\b\d+\b|\W+\d+$", "", text)
  text = re.sub("\d+", " ", text)
  text = ar.strip_tashkeel(text)
  text = ar.strip_tatweel(text)
  text = text.replace("#", " ");
  text = text.replace("@", " ");
  text = text.replace("_", " ");
  translator = str.maketrans('', '', string.punctuation)
  text = text.translate(translator)
  em = text
  em_split_emoji = emoji.get_emoji_regexp().split(em)
  em_split_whitespace = [substr.split() for substr in em_split_emoji]
  em_split = functools.reduce(operator.concat, em_split_whitespace)
  text = " ".join(em_split)
  text = re.sub(r'(.)\1+', r'\1', text)
  text_stem = " ".join([st.stemWord(i) for i in text.split()])
  text = text +" "+ text_stem
  text = text.replace("آ", "ا")
  text = text.replace("إ", "ا")
  text = text.replace("أ", "ا")
  text = text.replace("ؤ", "و")
  text = text.replace("ئ", "ي")
  
   
  return text

In [None]:
def model_init():
  return AutoModelForSequenceClassification.from_pretrained(Model_Used, return_dict=True, num_labels=len(label_map))

def compute_metrics(p): #p should be of type EvalPrediction
  preds = np.argmax(p.predictions, axis=1)
  assert len(preds) == len(p.label_ids)
  print(classification_report(p.label_ids,preds))
  #print(confusion_matrix(p.label_ids,preds))

  macro_f1_pos_neg = f1_score(p.label_ids,preds,average='macro',labels=[1,2])
  macro_f1 = f1_score(p.label_ids,preds,average='macro')
  macro_precision = precision_score(p.label_ids,preds,average='macro')
  macro_recall = recall_score(p.label_ids,preds,average='macro')
  acc = accuracy_score(p.label_ids,preds)
  return {
      'macro_f1' : macro_f1,
      'macro_f1_pos_neg' : macro_f1_pos_neg,  
      'macro_precision': macro_precision,
      'macro_recall': macro_recall,
      'accuracy': acc
  }

def set_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)


In [None]:
#define needed variables 

Tweets_Ids_Col_Train ="Tweet_id"
Tweets_Text_Col_Train = "Text"
Tweets_Sentiment_Col_Train = "sentiment"
Train_Data_File = "train_all.csv"
Train_Data_Extended_File = "train_all_ext.csv"

Use_Train_Extended_Data = False

Tweets_Ids_Col_Test = "Tweet_id"
Tweets_Text_Col_Test = "Text"
Test_Data_File = "test1_with_text.csv"

Rand_Seed = 42
Test_Size  = 0.001
Model_Used = "UBC-NLP/MARBERT"
Task_Name = "classification"

Max_Len = 0 # will be changed later after data cleaning
Extra_Len = 6


Output_File = "sub_filewwww.csv"

In [None]:
#define training arguments
training_args = TrainingArguments("./train")
training_args.lr_scheduler_type = 'cosine'
training_args.evaluate_during_training = True
training_args.adam_epsilon =1e-8 
training_args.learning_rate =  1.215e-05
training_args.fp16 = True
#1.215e-05 # best score 5-6-2021
training_args.per_device_train_batch_size = 16 #64 #69
training_args.per_device_eval_batch_size = 16 # 64 #69
training_args.gradient_accumulation_steps = 2
training_args.num_train_epochs= 2
training_args.warmup_steps = 0 
training_args.evaluation_strategy = EvaluationStrategy.EPOCH
training_args.logging_steps = 200
training_args.save_steps = 100000 #don't want to save any model, there is probably a better way to do this :)
training_args.seed = Rand_Seed #42 #was 84 #42 #42 #123 # 666 #0 #42
training_args.disable_tqdm = False

In [None]:
# Reading Training Data
train_data = pd.DataFrame()

if Use_Train_Extended_Data :
  train_data = pd.read_csv(Train_Data_Extended_File, sep=",")
else :
  train_data = pd.read_csv(Train_Data_File, sep=",")

print(train_data[Tweets_Sentiment_Col_Train].value_counts())
print(train_data.value_counts())

neutral     37359
positive     8821
negative     8820
Name: sentiment, dtype: int64
Tweet_id             sentiment  Text                                                                                                                           
1255515197252472833  neutral    @stc ماهذا بمنطق نتصل للدعم الفني فتواجهنا رساله رفض و طرد وكأننا نتسول الخدمة (نعتذر عن خدمتك حالياً ) انا احتجتكم حالياً #stc    1
1221153202533556225  neutral    نصيحه لحد يطيح بالفخ نفس ماطحت فيه أنا  نقدم شكوى ويقولون انطر يومين او ثلاث عشان نرد نتصل عليك https://t.co/fB7b1RtfA5            1
1221152551531315201  neutral    @Saudi_Airlines عندي رحلة صباحاً داخلية إقلاع الساعة ١٠:٣٠ مطار الملك خالد هل استطيع الذهاب للمطار الان وشحن العفش                 1
1221152090480881664  neutral    مش كلامك الكتير اللي هيجيب حقك ممكن نعتبرها زوبعة في فنجان                                                                         1
1221151148868063233  negative   @stccare_ksa اعيدوا لنا ماسلبتموه منا بالغش والظام خافوا الله ال

In [None]:
# Cleaning Training Data 
train_data[Tweets_Text_Col_Train] = train_data[Tweets_Text_Col_Train].apply(lambda x:   data_cleaning(x))
train_data[Tweets_Text_Col_Train].head(50)

0     الزعل بيغير ملامحك بيغير نظرة العين بيغير شكلك...
1     halgawi DmfMohe ليس حبا في ايران بقدر ماهو نكا...
2     adalfahaduwail ابي اعرف الحاكم العربي المسلم ا...
3     sarmadbouchamou DimaSadek في الخطاب تبع سليم س...
4     FofaMahmoud مفيش الكلام ده في الزمن FofaMahmou...
5     elm عندي مشكله لما تبي اجد اقامة عامل حاولت ات...
6     الرياسة الفلسطينية تدعو السفراء العرب والمسلمي...
7     Hamed Alali التجنيس يضر اهل السنه في سوريا تغي...
8        سوف تبحث عني في شخص اخر سوف تبحث عن في شخص اخر
9     Bethoven كل ما املكه في تلك الحياة هو كرامتي و...
10    ان كان لك نصيب في شيء سيقلب اله كل الموازين لك...
11    الموكد ان هذا الوزيز هو معمر العرياني والموكد ...
12    الاتحادالافريقي لكرة القدم caf fifa دى جماهير ...
13    mounirhafi في واحد كان معبى الكراسي وزراء وطلع...
14    لاغاب نور القمر في عتمة اليل ينشاف نور البدر ف...
15    عطيتك قلب م عطيته لاحد غيرك غلطه و في ذمتي م ك...
16    الغريب في الامر انك مازلت تنتظر رغم كل هذه الخ...
17    almobark عقبال القمة دنيا باطما تاجر فبات 

In [None]:
# remove ids
if Tweets_Ids_Col_Train in train_data.columns:
  del train_data[Tweets_Ids_Col_Train]

train_data.columns = [Tweets_Sentiment_Col_Train,Tweets_Text_Col_Train]

Max_Len = train_data[Tweets_Text_Col_Train].str.split().str.len().max() + Extra_Len

print(Max_Len)

124


In [None]:
# split training data to train_set and validation_set

train_set, evaluation_set = train_test_split(
    train_data, test_size= Test_Size, random_state= Rand_Seed 
)

print("Train set: ")
print(train_set[Tweets_Sentiment_Col_Train].value_counts())
print("---------------------------")
print ("Evaluation set: ")
print (evaluation_set[Tweets_Sentiment_Col_Train].value_counts())


Train set: 
neutral     37321
positive     8817
negative     8807
Name: sentiment, dtype: int64
---------------------------
Evaluation set: 
neutral     38
negative    13
positive     4
Name: sentiment, dtype: int64


In [None]:
# preparing test_data (which will be sumbitted to kaggle)

test_data = pd.read_csv(Test_Data_File, sep=",")
test_data.columns = [Tweets_Ids_Col_Test,Tweets_Text_Col_Test]

test_data[Tweets_Text_Col_Test] = test_data[Tweets_Text_Col_Test].apply(lambda x:   data_cleaning(x))
test_data[Tweets_Text_Col_Test].head(50)


0     kamnap bip ksa nazaha gov sa MCgovSA شوفوا هال...
1     ربما الموت يقترب مني وانا لا اشعر به لطفك يا ا...
2     mhrsd care السلام عليكم لوسمحت تحديث ملف المنش...
3     ”لقد نام تاركا حلمه في ودايع اله فايقظه اله عل...
4     nahdihope طالبه طلبيه منكم من امس ولحد الان ما...
5     يناير يوم جميل جدا اجمل من شعور لمت الصحبات في...
6     🔴 مﻧ الخاص 📩 🖋 متى نتايج التحويل لجامعة ام الق...
7     مين من الاعلامين الاهلاوين بيظهرو اليوم في اي ...
8     يا اله شو هالمناعة الحلوة الي عندي على ابسط سف...
9     نشكر الشرطة المصرية لحماية البشرية من فيروس كو...
10    اح وربي طول المقطع وقلبي واقف خايف يصيرلهم شي ...
11    mhrsd care السلام عليكم اخي الكريم اريد ان اعر...
12    من اروع و اجمل الفعاليات لي حضرتها حقت موسم جد...
13    مستنيه ان ربنا ينجحني انا ولا نسان الي بحبه ون...
14    MCgovSA البلدية طلبوا مني تحديث بيانات السجل ا...
15    الهم استجب لنا ما نعجز عن قوله، ما يصعب علينا ...
16    eMoror السلام عليكم لو سمحت كيف اسقط ملكيه مرك...
17    بنسحب ياعرب ربي الحلا ورق العنب والشاورما 

In [None]:
# prepare for Training
label_list = list(train_set[Tweets_Sentiment_Col_Train].unique())

print(label_list)
print(train_set[Tweets_Sentiment_Col_Train].value_counts())

data_set = Dataset( "KAUST", train_set, evaluation_set, label_list )

label_map = { v:index for index, v in enumerate(label_list) }
print(label_map)

train_dataset = BERTModelDataset(train_set[Tweets_Text_Col_Train].to_list(),
                                 train_set[Tweets_Sentiment_Col_Train].to_list(),Model_Used,Max_Len,label_map)

evaluation_dataset = BERTModelDataset(evaluation_set[Tweets_Text_Col_Train].to_list(),
                                      evaluation_set[Tweets_Sentiment_Col_Train].to_list(),Model_Used,Max_Len,label_map)



['neutral', 'negative', 'positive']
neutral     37321
positive     8817
negative     8807
Name: sentiment, dtype: int64
{'neutral': 0, 'negative': 1, 'positive': 2}


In [None]:
x = train_set[Tweets_Text_Col_Train].to_list()
y = train_set[Tweets_Sentiment_Col_Train].to_list()
x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.0005, random_state=42)

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer = TfidfVectorizer(ngram_range=(1,8),  analyzer='char' ,  max_df=0.999999999, min_df=1, sublinear_tf=True, use_idf=True, norm='l2' )
train_vectors = vectorizer.fit_transform(x_train)
val_vectors = vectorizer.transform(x_val)
print("Shape o# training data : ", train_vectors.shape, "\nShape of validation data : ", val_vectors.shape)


Shape o# training data :  (54917, 8265199) 
Shape of validation data :  (28, 8265199)


In [None]:
from sklearn.svm import LinearSVC
model_LR = LinearSVC(max_iter =500 ,class_weight= 'balanced', random_state=42);

model_LR.fit(train_vectors, y_train)


LinearSVC(C=1.0, class_weight='balanced', dual=True, fit_intercept=True,
          intercept_scaling=1, loss='squared_hinge', max_iter=500,
          multi_class='ovr', penalty='l2', random_state=42, tol=0.0001,
          verbose=0)

In [None]:
def performance(model, y_true, vectors):
    from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
    print("Predicting the sentiments...")
    y_pred = model.predict(vectors)  # y_pred_RF = model_RF.predict(val_vectors)
    df = pd.DataFrame({'actual': y_true, 'predicted': y_pred})
    print("\nAnalysis after prediction : \n")
    d = df['predicted'].value_counts(normalize=True) * 100  # series
    print(d)
    ## plot for analysis
    
    cm = confusion_matrix(y_true, y_pred)
    crp = classification_report(y_true, y_pred,digits=3)
    acc = accuracy_score(y_true, y_pred)
    return (cm, crp, acc)

In [None]:
perform = performance(model_LR, y_val, val_vectors)
print("Confusion Matrix :\n", perform[0])
print("classification report: \n", perform[1])
print("Accuracy score  = ", perform[2] * 100)
print("-" * 100)

Predicting the sentiments...

Analysis after prediction : 

neutral     78.571429
negative    10.714286
positive    10.714286
Name: predicted, dtype: float64
Confusion Matrix :
 [[ 2  1  0]
 [ 1 20  2]
 [ 0  1  1]]
classification report: 
               precision    recall  f1-score   support

    negative      0.667     0.667     0.667         3
     neutral      0.909     0.870     0.889        23
    positive      0.333     0.500     0.400         2

    accuracy                          0.821        28
   macro avg      0.636     0.679     0.652        28
weighted avg      0.842     0.821     0.830        28

Accuracy score  =  82.14285714285714
----------------------------------------------------------------------------------------------------


In [None]:
test_vectors = vectorizer.transform(test_data[Tweets_Text_Col_Test])

In [None]:
predicted = model_LR.predict(test_vectors)
print("Results : \n")

results = pd.DataFrame({'Tweet_id' : test_data[Tweets_Ids_Col_Test].astype(str), 'sentiment' : predicted},
                       columns = ['Tweet_id', 'sentiment'])

results['sentiment'] = results['sentiment'] .map({'positive':1, 'negative':-1, 'neutral':0})
results.to_csv("cc_small.csv", sep= ",", index = False)

Results : 



In [None]:
training_args.dataloader_pin_memory = False
gc.collect()
torch.cuda.empty_cache()
set_seed(Rand_Seed) 

trainer = Trainer(
    model = model_init(),
    args = training_args,
    train_dataset = train_dataset,
    eval_dataset= evaluation_dataset,
    compute_metrics=compute_metrics
)

print(training_args.seed)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=654186400.0, style=ProgressStyle(descri…




Some weights of the model checkpoint at UBC-NLP/MARBERT were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at U

42


In [None]:
print(Max_Len)
print(training_args.learning_rate)
#print(train_data_file)
print(training_args.adam_epsilon)
print(training_args.warmup_steps)

trainer.train()

142
1.215e-05
1e-08
0



The `pad_to_max_length` argument is deprecated and will be removed in a future version, use `padding=True` or `padding='longest'` to pad to the longest sequence in the batch, or use `padding='max_length'` to pad to a max length. In this case, you can give a specific length with `max_length` (e.g. `max_length=45`) or leave max_length to None to pad to the maximal input size of the model (e.g. 512 for Bert).



Epoch,Training Loss,Validation Loss,Macro F1,Macro F1 Pos Neg,Macro Precision,Macro Recall,Accuracy,Runtime,Samples Per Second
0,0.295,0.285726,0.882105,0.823158,0.90538,0.86646,0.905405,0.4436,166.822
1,0.1891,0.286484,0.888491,0.832736,0.894708,0.883023,0.905405,0.4114,179.873



Non-finite norm encountered in torch.nn.utils.clip_grad_norm_; continuing anyway. Note that the default behavior will change in a future release to error out if a non-finite total norm is encountered. At that point, setting error_if_nonfinite=false will be required to retain the old behavior.



              precision    recall  f1-score   support

           0       1.00      1.00      1.00        14
           1       0.90      0.96      0.93        46
           2       0.82      0.64      0.72        14

    accuracy                           0.91        74
   macro avg       0.91      0.87      0.88        74
weighted avg       0.90      0.91      0.90        74

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        14
           1       0.91      0.93      0.92        46
           2       0.77      0.71      0.74        14

    accuracy                           0.91        74
   macro avg       0.89      0.88      0.89        74
weighted avg       0.90      0.91      0.90        74



TrainOutput(global_step=4596, training_loss=0.26563772229758215, metrics={'train_runtime': 2301.5572, 'train_samples_per_second': 1.997, 'total_flos': 20405734677933552, 'epoch': 2.0})

In [None]:
print("training done")

training done


In [None]:
def predict(text, tokenizer):
 
  encoded_review = tokenizer.encode_plus(
    text,
    max_length=Max_Len,
    add_special_tokens=True,
    return_token_type_ids=False,
    pad_to_max_length=True, #True,
    truncation='longest_first',
    return_attention_mask=True,
    return_tensors='pt'
  )

  input_ids = encoded_review['input_ids'].to(device) #(input_ids + ([tokenizer.pad_token_id] * padding_length)).to(device)  
  attention_mask = encoded_review['attention_mask'].to(device)
    

  output = trainer.model(input_ids, attention_mask)
  _, prediction = torch.max(output[0], dim=1)
  return prediction[0]
  

In [None]:
# predict test file results
tokenizer = AutoTokenizer.from_pretrained(Model_Used)

prediction_list = []
i = 0
for tweet in test_data[Tweets_Text_Col_Test]:
    id = test_data[Tweets_Ids_Col_Test][i]
  
    pre = predict(tweet,tokenizer)
    pre_txt = label_list[pre]
   
    if pre_txt == 'positive': pre_txt = 1
    if pre_txt == 'negative': pre_txt = -1
    if pre_txt == 'neutral': pre_txt = 0
    prediction_list.append(pre_txt)
    
    i = i + 1
    



The `pad_to_max_length` argument is deprecated and will be removed in a future version, use `padding=True` or `padding='longest'` to pad to the longest sequence in the batch, or use `padding='max_length'` to pad to a max length. In this case, you can give a specific length with `max_length` (e.g. `max_length=45`) or leave max_length to None to pad to the maximal input size of the model (e.g. 512 for Bert).



In [None]:
print(prediction_list)

results = pd.DataFrame({'Tweet_id' : test_data[Tweets_Ids_Col_Test].astype(str), 'sentiment' : prediction_list},
                       columns = ['Tweet_id', 'sentiment'])

print(results)


[0, 0, 0, 0, 0, 1, 0, 0, -1, 1, -1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, -1, 0, 0, 0, -1, 1, 1, -1, 0, 1, 1, 0, 0, 0, 0, -1, 0, 1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, -1, 0, 0, -1, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, -1, 0, 1, 0, 0, -1, 0, 0, 0, 0, 0, 1, -1, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0, -1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, -1, -1, 1, 0, 1, -1, 0, 0, 0, 0, -1, 0, 1, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 1, 0, 0, 0, 0, -1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, -1, 0, 0, 0, 0, 1, 0, -1, 0, 0, 0, 0, -1, 1, 0, 0, 0, 0, -1, -1, 0, 0, 0, -1, 0, 1, 0, 0, -1, 0, 0, 0, 0, 1, 0, 0, -1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, -1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, -1, 1, 1, 0, -1, 0, 0, 0, 0, 0, -1, 0, 0, 0, 1, -1, 0, 0, 0, 0, 1, 1, 0, 0, -1, 1, 0, 0, 0, 1, 0, -1, 1, -1, 0, 0, 0, -1, 0, 0, -1, 1, 0, 0, 1, 0, -1, -

In [None]:
result_file = Output_File
results.to_csv(result_file, sep= ",", index = False)

In [None]:
print("done!")

done!
