# Master file with exp_name:

In [None]:
# import pandas library:
import os
import pandas as pd
import pylab as pyl
import numpy as np
from scipy.interpolate import *
import datetime
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
# pretty dataframe :
from IPython.core.display import HTML
css = open('style-table.css').read() + open('style-notebook.css').read()
HTML('<style>{}</style>'.format(css))

In [None]:
# Control variables: 


# master dataset location:
master_location = 'Dropbox/gits/data/ThOpt/clean_csv/'

# master file name : 
#file_name = 'ex4_EDData.csv'
#file_name_med = 'ex2_enDataSG_2T.csv'
#file_name_large = 'ex2_enDataSG_1T.csv'

# Functions:

In [None]:
def change_type(df, col, dtype):
    # This function change the column datatype
    # df - main data frame - type: dataframe
    # col - columns that you want to change data type - type: list
    # dtype - new column type - type: list
    # col and dtype must be equal in length

    if len(col) != len(dtype):
        print("col and dtype must be equal")
        return df
    for col,n_type in zip(col,dtype):
        df[col] = df[col].astype(n_type)

    return df

In [None]:
def find_unique_transfers(df, 
                          args = ['file_size','number_of_files','bandwidth','rtt','buffer_size']):
    # find unique transfers:
    # df - main dataframe
    # args - list of columns you want for uniqueness

    # get the unique transfers : as tuple 
    transfer_requests = df[args]
    droped_duplicate_trans_req = transfer_requests.drop_duplicates()

    unique_transfers = [tuple(transfers) for transfers in droped_duplicate_trans_req.values]
    return unique_transfers

In [None]:
def plot_runs_1D(df, bk_tr, runs,colors,sort_order):
    # This function plots runs
    # df - main dataframe, type: dataframe
    # bk_tr - background traffic type
    # runs - list of runs you want to plot, type: List
    # colors - list of colors for the graphs
    # length of colors must be equal to (bk_tr X runs)
    # sort_order - order of params, type: list. (for example 'p-cc-pp','pp-cc-p')
    # 
    
    if len(colors) != (len(bk_tr) * len(runs)) :
        print("length of colors must be equal to (bk_tr X runs)")
        return
    
    
    plt.figure(figsize=(35,13))
    
    df = df.sort(sort_order)
    x_label = 'Parameter order - ' + sort_order[0] +', '+ sort_order[1] + ', ' + sort_order[2]
    plt.xlabel(x_label, fontsize=40)
    plt.ylabel('Throughput (Gbps)', fontsize=40)
    title = 'Throughput Trajectory(4) - Parameter order - '+ sort_order[0] +', '+ sort_order[1] + ', ' + sort_order[2]
    plt.title(title, fontsize=50)

    #df = df.sort(['cc','p','pp'])
    color_count = 0
    for bk in bk_tr:
        bk_data = df[df.background == bk]
        for run in runs:
            run_data = bk_data[bk_data.run_id == run]
            num_rows = run_data.shape[0]
            #df_list.append(run_data)
            run_data['ticks'] = run_data[sort_order[0]].astype(str) + '-' \
                                + run_data[sort_order[1]].astype(str) + '-' \
                                + run_data[sort_order[2]].astype(str)


            x_value = np.arange(1,num_rows+1)
            x_value
            y_value = np.array(run_data.throughput)
            y_value.shape
            tick_value = np.array(run_data.ticks)

            plt.xticks(x_value[0:(num_rows+1):32], tick_value[0:(num_rows+1):32])
            plt.tick_params(axis='both', which='major', labelsize=30)

            plt.plot(x_value, y_value,colors[color_count],)
            color_count = color_count + 1
    plt.show()

# Read and merge data files:

In [None]:
!ls ~/Dropbox/gits/data/ThOpt/clean_csv/

In [None]:
# required values :

# OS file seperator: 
file_seperator = os.sep


# User OS home directory:
user_home = os.environ['HOME']
user_home = user_home+file_seperator
#user_home

In [None]:
log_files_path = user_home + master_location
list_of_logs = os.listdir(log_files_path)
list_of_logs

In [None]:
file_list = []
for file_path in list_of_logs:
    print(file_path)
    url = user_home + master_location + file_path
    temp_data = pd.read_csv(url)
    
    file_list.append(temp_data)
data = pd.concat(file_list, ignore_index=True) 
data.shape

In [None]:
# See all the experiments:
data.exp_name.unique()

In [None]:
data["destination"] = data["destination"].replace(['Gordon/oasis','Gordon'],'Gordon/Oasis')
data.head()

In [None]:
find_unique_transfers(data)

# Get exp_2 data:

In [None]:
exp_2 = data[data.exp_name == "exp_2_background_traffic"]

In [None]:
# get the small data and only transfer from gordon and stampede:
temp_exp2 = change_type(exp_2, ["file_size"], ["int"])
exp_2_small = temp_exp2[(temp_exp2.file_size < 10000000) & (temp_exp2.destination == 'Gordon/Oasis') ] 

bk_traffic = [0,1,2]
runs = [2]
colors = ['ro','ro','ro']
sort_order = ['cc','p','pp']
plot_runs_1D(exp_2_small,bk_traffic,runs,colors, sort_order)

# Get all data and perform CNN:

In [None]:
temp_data = data.copy(deep=True)

In [None]:
args = ['file_size', 'number_of_files', 'bandwidth', 'rtt', 'buffer_size','source', 'destination','run_id', 'background']
unique_req = find_unique_transfers(temp_data, args)
num_reqs = len(find_unique_transfers(temp_data, args))

In [None]:
temp_data = temp_data.set_index(args)
temp_data.head()

## Make Ndarray for CNN:

In [None]:
# make each request a matrix of ndarray 
# with all requests make 264 * 32 * 32 matrix
m = 32
n = 32
x = num_reqs

mat = np.zeros([x,m+1,n+1])
temp_mat = np.zeros([m+1,n+1])
mat_yy = np.zeros(x)
req_count = 0
for req in unique_req:
    current_req = temp_data.loc[req]
    current_req = current_req.reset_index()
    for index, row in current_req.iterrows():
        mat[req_count, row.p, row.cc] = row.throughput
        temp_mat[row.p, row.cc] = row.throughput
        run_id = row.run_id
    mat_yy[req_count] = np.sum(temp_mat)
    #mat_y[req_count] = run_id
    req_count = req_count + 1
    

In [None]:
mat_yy

In [None]:
high = np.percentile(mat_yy,80)
mid_high = np.percentile(mat_yy,60)
mid = np.percentile(mat_yy,50)
low_high = np.percentile(mat_yy,40)
low = np.percentile(mat_yy,20)
#profile = [low,low_high,mid,mid_high,high]

In [None]:
def convert_to_discrete_traffic(x,high,mid_high,mid,low_high,low):
    y = x
    count = 0
    for item in x:
        if item >= high:
            y[count] = 5
        elif (item < high) and item >=mid_high:
            y[count] = 4
        elif(item < mid_high) and (item >= mid):
            y[count] = 3
        elif(item < mid) and (item >= low_high):
            y[count] = 2
        elif(item < low_high) and (item >= low):
            y[count] = 1
        else:
            y[count] = 0
        count = count + 1
    return y

In [None]:
mat_y = convert_to_discrete_traffic(mat_yy,high,mid_high,mid,low_high,low)
mat_y

# Core CNN code:

In [None]:

import theano 
import warnings

from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.utils import np_utils
from keras.datasets import mnist

In [None]:
# split data for training and testing: 
training_idx = np.random.randint(mat.shape[0], size=220)
test_idx = np.random.randint(mat.shape[0], size=44)
X_train, X_test = mat[training_idx,:], mat[test_idx,:]
y_train = mat_y[training_idx]
y_test = mat_y[test_idx]

In [None]:
X_train.shape

In [None]:
X_train = X_train.reshape(X_train.shape[0], 1, 33, 33)
X_test = X_test.reshape(X_test.shape[0], 1, 33, 33)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255

In [None]:
Y_train = np_utils.to_categorical(y_train, 12)
Y_test = np_utils.to_categorical(y_test, 12)

In [None]:
# 7. Define model architecture
model = Sequential()
model.add(Convolution2D(32, 3, 3, activation='relu', input_shape=(1,33,33), 
                        dim_ordering='th'))
#model.add(Convolution2D(32, 3, 3, activation='relu', input_shape=(1,28,28)))
model.add(Convolution2D(32, 3, 3, activation='relu'))
model.add(Convolution2D(32, 3, 3, activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(12, activation='softmax'))
model.summary()

In [None]:
# 8. Compile model
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [None]:
# 9. Fit model on training data
history = model.fit(X_train, Y_train, 
          batch_size=32, nb_epoch=30, verbose=1)

In [None]:
history

In [None]:
# list all data in history
print(history.history.keys())
# summarize history for accuracy
plt.plot(history.history['acc'])
#plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()


In [None]:
# summarize history for loss
plt.plot(history.history['loss'])
#plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper right')
plt.show()

In [None]:
# 10. Evaluate model on test data
score = model.evaluate(X_test, Y_test, verbose=0)

In [None]:
score

In [None]:
suffled_data = data.sample(frac=1)

In [None]:
suffled_data.reset_index(drop=True)

In [None]:
del suffled_data["run_id"]

In [None]:
del suffled_data["fast"]

In [None]:
suffled_data.head()

In [None]:
suffled_data.to_csv("logs_mixed_traffic_3runs.csv",index=False)