# DAN Extrinsic Evaluation (I)
## Creating possible DAN Combinations

In [1]:
import sys
import os
import tensorflow as tf
import numpy as np
import pandas as pd
import seaborn as sns
import time
sys.path.append(os.path.abspath("../"))
from rs_helper import DAN

## Reading Data

In [2]:
data_dir = "../datasets/final_datasets/clean_dataset"

In [3]:
def read_data(_dir:str) -> pd.DataFrame:
    data = {}
    #data["url"] = []
    data["text"] = []
    data["class"] = []
    for root, dirs, files in os.walk(_dir):
        for _dir in dirs: 
            for txt_file in [x for x in os.listdir(os.path.join(root, _dir)) if x.endswith((".txt", ".TXT"))]:
                # Class name = dir name
                class_name = _dir
                #Read File
                file_name = os.path.abspath(os.path.join(root, _dir, txt_file))
                file = open(file_name, "r")
                txt = file.read()
                file.close()
                #data["url"].append(file_name)
                data["text"].append(txt)
                data["class"].append(class_name)
    df = pd.DataFrame.from_dict(data)
    del data
    return df

In [4]:
df = read_data(data_dir).sample(frac=1)

In [5]:
df.head()

Unnamed: 0,text,class
4786,introduction clustering is an unsupervised lea...,clustering
5278,clustering problem clustering is a distributio...,clustering
1897,thus the model is extrapolated and the outlier...,prediction
4325,clustering is a popular approach to study the ...,clustering
723,thus crossdomain sentiment classification is p...,prediction


## FastText Model

In [6]:
from rs_helper import FastText
import itertools

In [7]:
ft_model = FastText()

INFO:tensorflow:FastText Model is loading


INFO:tensorflow:FastText Model is loading


INFO:tensorflow:FastText Model loaded!


INFO:tensorflow:FastText Model loaded!


In [8]:
combinations = {"num_layer": np.arange(2,8), "classifier_shape": [[200],[100,50], [200,100]], "epoches":[5,10,20], "drop_prob":[0.2,0.3,0.5]}

In [9]:
combinations = list(itertools.product(combinations["num_layer"], combinations["classifier_shape"], combinations["epoches"], combinations["drop_prob"]))

In [10]:
len(combinations)

162

In [11]:
# Still missing combinations
#combinations[45:]

In [12]:
%%time
tf.logging.set_verbosity(tf.logging.ERROR)
for i,x in enumerate(combinations[51:]):
    i+=51
    t1 = time.time()
    tf.reset_default_graph()
    model_dir = os.path.join("DAN_COMBINATIONS", "model_layer{}".format(i))
    _DAN = DAN(ft_model)
    _DAN.train(text=df["text"].tolist(),
               labels=df["class"].tolist(),
               save_path = model_dir,
               model_name = "DAN_{}".format(i),
               epoches = x[2],
               num_hidden_layer = x[0],
               wdrop_prob = x[3],
               classifier_shape = x[1]
              )
    t2 = time.time()
    _DAN.save_config_json(os.path.join(model_dir, "config.json"), time=(t2-t1)/60)
    print("Model_{} trained and saved to {}".format(i, model_dir))

Model saved to /Users/Daniel/PycharmProjects/Recommender-System/notebooks/DAN_COMBINATIONS/model_layer51.
Model_51 trained and saved to DAN_COMBINATIONS/model_layer51
Model saved to /Users/Daniel/PycharmProjects/Recommender-System/notebooks/DAN_COMBINATIONS/model_layer52.
Model_52 trained and saved to DAN_COMBINATIONS/model_layer52
Model saved to /Users/Daniel/PycharmProjects/Recommender-System/notebooks/DAN_COMBINATIONS/model_layer53.
Model_53 trained and saved to DAN_COMBINATIONS/model_layer53
Model saved to /Users/Daniel/PycharmProjects/Recommender-System/notebooks/DAN_COMBINATIONS/model_layer54.
Model_54 trained and saved to DAN_COMBINATIONS/model_layer54
Model saved to /Users/Daniel/PycharmProjects/Recommender-System/notebooks/DAN_COMBINATIONS/model_layer55.
Model_55 trained and saved to DAN_COMBINATIONS/model_layer55
Model saved to /Users/Daniel/PycharmProjects/Recommender-System/notebooks/DAN_COMBINATIONS/model_layer56.
Model_56 trained and saved to DAN_COMBINATIONS/model_layer5

## Running other Evaluation Tasks

In [13]:
%run Eval_DAN_2.ipynb

{'epoches': '5', 'dropout': '0.3', 'path': '/Users/Daniel/PycharmProjects/Recommender-System/notebooks/DAN_COMBINATIONS/model_layer127/DAN_127', 'classes': '3', 'classifier_shape': '[200, 100]', 'hidden_layer': '6', 'time': '1.7953984538714092', 'insampling_linear_kernel': 0.6484765802637562, 'test_linear_kernel': 0.6753069577080492, 'valid_score_linear_kernel': 0.5166666666666667, 'insampling_rbf_kernel': 0.5402455661664393, 'test_rbf_kernel': 0.572987721691678, 'valid_score_rbf_kernel': 0.38333333333333336}
1 of 162
{'epoches': '5', 'dropout': '0.3', 'path': '/Users/Daniel/PycharmProjects/Recommender-System/notebooks/DAN_COMBINATIONS/model_layer118/DAN_118', 'classes': '3', 'classifier_shape': '[100, 50]', 'hidden_layer': '6', 'time': '1.6404570817947388', 'insampling_linear_kernel': 0.6160375928452326, 'test_linear_kernel': 0.6152796725784447, 'valid_score_linear_kernel': 0.38333333333333336, 'insampling_rbf_kernel': 0.4342883128694861, 'test_rbf_kernel': 0.44747612551159616, 'valid

In [None]:
%matplotlib
%run Eval_DAN_3.ipynb

Using matplotlib backend: TkAgg
