## Convert Cisco Data to NPZ for DG

In [1]:
import pandas as pd
import sys
import os
import pickle
import numpy as np
from numpy import load
import matplotlib.pyplot as plt
from statsmodels.graphics import tsaplots

In [2]:
def autonorm(vector_in, min_range, max_range):
    diff = max_range-min_range
    maxmin = max(vector_in)-min(vector_in)
    if maxmin == 0:
        return vector_in*np.mean([min_range, max_range])
    vector_out = ((vector_in - min(vector_in))*diff)/(max(vector_in)-min(vector_in))+min_range
    return vector_out

In [3]:
cisco_data = pd.read_csv("bgpclear.csv")
unique_names = cisco_data.name.unique()

  cisco_data = pd.read_csv("bgpclear.csv")


In [4]:
grouped_data = cisco_data.groupby("name")
dict={}

for i in range(0,len(unique_names)):
    x = unique_names[i].split('/', 100)
    key = str(x[len(x)-1])
    y = grouped_data.get_group(unique_names[i])
    y = y.dropna(axis=1, how = 'all')
    y = y.dropna()
    dict[key] = y

Producer will be 1-H encoded. Ex. dr01 is [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0]

features are all normalized to [-1 1].

## Actually, we only care about these:
Features to explore according to paper:

		"active-routes-count",
		"as",
		"backup-routes-count",
		"deleted-routes-count",
		"paths-count",
		"protocol-route-memory",
		"routes-counts",
		"global__established-neighbors-count-total",
		"global__neighbors-count-total",
		"global__nexthop-count",
		"global__restart-count",
		"performance-statistics__global__configuration-items-processed",
		"performance-statistics__global__ipv4rib-server__rib-connection-up-count",
		"performance-statistics__vrf__inbound-update-messages",
		"vrf__neighbors-count",
		"vrf__network-count",
		"vrf__path-count",
		"vrf__update-messages-received"
        
Producer to explore according to paper:

		"leaf1", "leaf2", "leaf3",
		"leaf5", "leaf6", "leaf7", "leaf8",
		"spine1", "spine2", "spine3", "spine4"

We only care about the name 'information' and 'process-info'

In [None]:
dict["information"]

In [None]:
dict["process-info"]

## How to Set Up DF?
Since we don't care about time, or name, only the producer metadata, we can simplify the df into first parsing each relevent dataset with the features we care about, then, combine both of them delimited via the producer.

In [5]:
unique_producers = ["leaf1", "leaf2", "leaf3", "leaf5", "leaf6", "leaf7", "leaf8", "spine1", "spine2", "spine3", "spine4"]
features = [ "active-routes-count",
    "as",
    "backup-routes-count",
    "deleted-routes-count",
    "paths-count",
    "protocol-route-memory",
    "routes-counts",
    "global__established-neighbors-count-total",
    "global__neighbors-count-total",
    "global__nexthop-count",
    "global__restart-count",
    "performance-statistics__global__configuration-items-processed",
    "performance-statistics__global__ipv4rib-server__rib-connection-up-count",
    "performance-statistics__vrf__inbound-update-messages",
    "vrf__neighbors-count",
    "vrf__network-count",
    "vrf__path-count",
    "vrf__update-messages-received"]

In [6]:
info_df = dict["information"]
info_df = info_df.loc[info_df['Producer'].isin(unique_producers)]
info_df = info_df.reset_index()
info_df

Unnamed: 0,index,name,time,EncodingPath,Producer,active-routes-count,af-name,as,backup-routes-count,deleted-routes-count,paths-count,protocol-route-memory,route-table-name,routes-counts,saf-name,vrf-name
0,702423,Cisco-IOS-XR-ip-rib-ipv4-oper:rib/vrfs/vrf/afs...,1498754072913000000,Cisco-IOS-XR-ip-rib-ipv4-oper:rib/vrfs/vrf/afs...,leaf2,162,IPv4,65022,1,0,925,124464,default,163,Unicast,default
1,702424,Cisco-IOS-XR-ip-rib-ipv4-oper:rib/vrfs/vrf/afs...,1498754073044000000,Cisco-IOS-XR-ip-rib-ipv4-oper:rib/vrfs/vrf/afs...,spine3,152,IPv4,65013,0,0,1152,148480,default,152,Unicast,default
2,702425,Cisco-IOS-XR-ip-rib-ipv4-oper:rib/vrfs/vrf/afs...,1498754073111000000,Cisco-IOS-XR-ip-rib-ipv4-oper:rib/vrfs/vrf/afs...,leaf8,164,IPv4,65028,0,0,957,128176,default,164,Unicast,default
3,702426,Cisco-IOS-XR-ip-rib-ipv4-oper:rib/vrfs/vrf/afs...,1498754073432000000,Cisco-IOS-XR-ip-rib-ipv4-oper:rib/vrfs/vrf/afs...,leaf1,165,IPv4,65021,1,0,982,131232,default,166,Unicast,default
4,702427,Cisco-IOS-XR-ip-rib-ipv4-oper:rib/vrfs/vrf/afs...,1498754073573000000,Cisco-IOS-XR-ip-rib-ipv4-oper:rib/vrfs/vrf/afs...,leaf6,164,IPv4,65026,0,0,980,130752,default,164,Unicast,default
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7372,711050,Cisco-IOS-XR-ip-rib-ipv4-oper:rib/vrfs/vrf/afs...,1498757070667000000,Cisco-IOS-XR-ip-rib-ipv4-oper:rib/vrfs/vrf/afs...,leaf3,164,IPv4,65023,0,0,947,127056,default,164,Unicast,default
7373,711051,Cisco-IOS-XR-ip-rib-ipv4-oper:rib/vrfs/vrf/afs...,1498757071498000000,Cisco-IOS-XR-ip-rib-ipv4-oper:rib/vrfs/vrf/afs...,leaf1,165,IPv4,65021,1,0,982,131232,default,166,Unicast,default
7374,711052,Cisco-IOS-XR-ip-rib-ipv4-oper:rib/vrfs/vrf/afs...,1498757071716000000,Cisco-IOS-XR-ip-rib-ipv4-oper:rib/vrfs/vrf/afs...,leaf5,165,IPv4,65025,0,0,980,130880,default,165,Unicast,default
7375,711053,Cisco-IOS-XR-ip-rib-ipv4-oper:rib/vrfs/vrf/afs...,1498757072242000000,Cisco-IOS-XR-ip-rib-ipv4-oper:rib/vrfs/vrf/afs...,leaf7,164,IPv4,65027,0,0,967,129296,default,164,Unicast,default


In [7]:
process_df = dict["process-info"]
process_df = process_df.loc[process_df['Producer'].isin(unique_producers)]
process_df = process_df.reset_index()
process_df

Unnamed: 0,index,name,time,EncodingPath,Producer,global__established-neighbors-count-total,global__neighbors-count-total,global__nexthop-count,global__restart-count,instance-name,performance-statistics__global__configuration-items-processed,performance-statistics__global__ipv4rib-server__is-rib-connection-up,performance-statistics__global__ipv4rib-server__rib-connection-up-count,performance-statistics__vrf__inbound-update-messages,vrf-name,vrf__neighbors-count,vrf__network-count,vrf__path-count,vrf__update-messages-received
0,711057,Cisco-IOS-XR-ipv4-bgp-oper:bgp/instances/insta...,1498754073016000000,Cisco-IOS-XR-ipv4-bgp-oper:bgp/instances/insta...,leaf2,37,39,151,2,default,2,true,1,67221,default,39,352,5857,67221
1,711058,Cisco-IOS-XR-ipv4-bgp-oper:bgp/instances/insta...,1498754073189000000,Cisco-IOS-XR-ipv4-bgp-oper:bgp/instances/insta...,spine3,56,64,231,2,default,14,true,1,63498,default,64,352,8353,63498
2,711059,Cisco-IOS-XR-ipv4-bgp-oper:bgp/instances/insta...,1498754073223000000,Cisco-IOS-XR-ipv4-bgp-oper:bgp/instances/insta...,leaf8,39,40,147,2,default,3,true,1,67186,default,40,352,5937,67186
3,711060,Cisco-IOS-XR-ipv4-bgp-oper:bgp/instances/insta...,1498754073547000000,Cisco-IOS-XR-ipv4-bgp-oper:bgp/instances/insta...,leaf1,38,38,140,2,default,5,true,1,69236,default,38,352,4769,69236
4,711061,Cisco-IOS-XR-ipv4-bgp-oper:bgp/instances/insta...,1498754073726000000,Cisco-IOS-XR-ipv4-bgp-oper:bgp/instances/insta...,leaf6,38,38,145,2,default,2,true,1,68497,default,38,352,6213,68497
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7372,719684,Cisco-IOS-XR-ipv4-bgp-oper:bgp/instances/insta...,1498757070778000000,Cisco-IOS-XR-ipv4-bgp-oper:bgp/instances/insta...,leaf3,37,39,151,2,default,2,true,1,76731,default,39,352,5990,76731
7373,719685,Cisco-IOS-XR-ipv4-bgp-oper:bgp/instances/insta...,1498757071684000000,Cisco-IOS-XR-ipv4-bgp-oper:bgp/instances/insta...,leaf1,38,38,140,2,default,6,true,1,72822,default,38,352,4687,72822
7374,719686,Cisco-IOS-XR-ipv4-bgp-oper:bgp/instances/insta...,1498757071851000000,Cisco-IOS-XR-ipv4-bgp-oper:bgp/instances/insta...,leaf5,37,37,139,2,default,2,true,1,69748,default,37,352,5297,69748
7375,719687,Cisco-IOS-XR-ipv4-bgp-oper:bgp/instances/insta...,1498757072352000000,Cisco-IOS-XR-ipv4-bgp-oper:bgp/instances/insta...,leaf7,38,38,145,2,default,2,true,1,71863,default,38,352,6213,71863


In [8]:
leaf1_proc = process_df[process_df['Producer'] == "leaf1"].reset_index()
leaf1_info = info_df[info_df['Producer'] == "leaf1"].reset_index()
combined = pd.concat([leaf1_proc, leaf1_info], axis = 1)
combined

Unnamed: 0,level_0,index,name,time,EncodingPath,Producer,global__established-neighbors-count-total,global__neighbors-count-total,global__nexthop-count,global__restart-count,...,af-name,as,backup-routes-count,deleted-routes-count,paths-count,protocol-route-memory,route-table-name,routes-counts,saf-name,vrf-name
0,3,711060,Cisco-IOS-XR-ipv4-bgp-oper:bgp/instances/insta...,1498754073547000000,Cisco-IOS-XR-ipv4-bgp-oper:bgp/instances/insta...,leaf1,38,38,140,2,...,IPv4,65021,1,0,982,131232,default,166,Unicast,default
1,14,711073,Cisco-IOS-XR-ipv4-bgp-oper:bgp/instances/insta...,1498754078037000000,Cisco-IOS-XR-ipv4-bgp-oper:bgp/instances/insta...,leaf1,38,38,140,2,...,IPv4,65021,1,0,982,131232,default,166,Unicast,default
2,25,711085,Cisco-IOS-XR-ipv4-bgp-oper:bgp/instances/insta...,1498754082520000000,Cisco-IOS-XR-ipv4-bgp-oper:bgp/instances/insta...,leaf1,38,38,140,2,...,IPv4,65021,1,0,982,131232,default,166,Unicast,default
3,36,711098,Cisco-IOS-XR-ipv4-bgp-oper:bgp/instances/insta...,1498754086993000000,Cisco-IOS-XR-ipv4-bgp-oper:bgp/instances/insta...,leaf1,38,38,140,2,...,IPv4,65021,1,0,982,131232,default,166,Unicast,default
4,47,711110,Cisco-IOS-XR-ipv4-bgp-oper:bgp/instances/insta...,1498754091502000000,Cisco-IOS-XR-ipv4-bgp-oper:bgp/instances/insta...,leaf1,38,38,140,2,...,IPv4,65021,1,0,982,131232,default,166,Unicast,default
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
662,7329,719635,Cisco-IOS-XR-ipv4-bgp-oper:bgp/instances/insta...,1498757053569000000,Cisco-IOS-XR-ipv4-bgp-oper:bgp/instances/insta...,leaf1,38,38,140,2,...,IPv4,65021,1,0,982,131232,default,166,Unicast,default
663,7340,719648,Cisco-IOS-XR-ipv4-bgp-oper:bgp/instances/insta...,1498757058135000000,Cisco-IOS-XR-ipv4-bgp-oper:bgp/instances/insta...,leaf1,38,38,140,2,...,IPv4,65021,1,0,982,131232,default,166,Unicast,default
664,7351,719660,Cisco-IOS-XR-ipv4-bgp-oper:bgp/instances/insta...,1498757062749000000,Cisco-IOS-XR-ipv4-bgp-oper:bgp/instances/insta...,leaf1,38,38,140,2,...,IPv4,65021,1,0,982,131232,default,166,Unicast,default
665,7362,719673,Cisco-IOS-XR-ipv4-bgp-oper:bgp/instances/insta...,1498757067147000000,Cisco-IOS-XR-ipv4-bgp-oper:bgp/instances/insta...,leaf1,38,38,140,2,...,IPv4,65021,1,0,982,131232,default,166,Unicast,default


In [None]:
process_df['Producer'].value_counts()

In [None]:
info_df['Producer'].value_counts()

In [9]:
complete_array = np.zeros([11,678,18])
padding_hist = np.zeros([len(unique_producers)])
flag = np.zeros([len(unique_producers), 678])
count = 0
print(len(complete_array[5]))
for producer in unique_producers:
    prod_info_df = info_df[info_df['Producer'] == producer].reset_index() #selecting unqiue producers
    prod_process_df = process_df[process_df['Producer'] == producer].reset_index() #now both info and process dfs
                                      #are only looking at a single producer
    combined = pd.concat([prod_info_df, prod_process_df], axis = 1) #one df with one producer
    combined = combined[features] #selecting only feature we care about
    #print(combined)
    #converting to numpy array
    comb_numpy = combined.to_numpy()
    comb_numpy = comb_numpy.astype(np.float)
    #print(comb_numpy)
    #normalization to -1 to 1
    comb_numpy_t = np.transpose(comb_numpy)
    comb_numpy_norm = comb_numpy_t
    
    for i in range(len(comb_numpy_t)):
        comb_numpy_norm[i] = autonorm(comb_numpy_t[i], -1, 1)
    comb_numpy_norm = np.transpose(comb_numpy_t)
    #need to pad w zeros now
    
    print(len(comb_numpy_norm))
    
    #padding with zeros
    
    pad = np.pad(comb_numpy_norm, [(0, len(complete_array[0]) - len(comb_numpy_norm)), (0, 0)], mode='constant')
    
    flag_pad = np.ones([1, len(comb_numpy_norm)])
    flag_pad = np.pad(flag_pad, [(0, 0), (0, len(complete_array[0]) - len(comb_numpy_norm))], mode='constant')
    
    complete_array[count] = pad
    flag[count] = flag_pad
    padding_hist[count] = len(complete_array[0]) - len(comb_numpy_norm)
    print(f"Padding {producer} with {len(complete_array[0]) - len(comb_numpy_norm)}")
    print(" ")
    count+=1

678
667
Padding leaf1 with 11
 
673
Padding leaf2 with 5
 
672
Padding leaf3 with 6
 
675
Padding leaf5 with 3
 
673
Padding leaf6 with 5
 
678
Padding leaf7 with 0
 
677
Padding leaf8 with 1
 
665
Padding spine1 with 13
 
664
Padding spine2 with 14
 
668
Padding spine3 with 10
 
665
Padding spine4 with 13
 


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  comb_numpy = comb_numpy.astype(np.float)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  comb_numpy = comb_numpy.astype(np.float)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  comb_numpy = comb_numpy.astype(np.float)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  comb_numpy = comb_numpy.astype(np.float)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  comb_numpy = comb_numpy.astype(np.float)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  comb_numpy = comb_numpy.astype(np.float)
Deprecated

In [11]:
x = complete_array
x = x[0]
x

array([[ 1.        ,  0.        ,  1.        , ...,  1.        ,
         0.45875096, -1.        ],
       [ 1.        ,  0.        ,  1.        , ...,  1.        ,
         0.45875096, -1.        ],
       [ 1.        ,  0.        ,  1.        , ...,  1.        ,
         0.45875096, -1.        ],
       ...,
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ]])

# We have feat and flag, now need attr

In [12]:
attr = np.zeros([len(unique_producers),len(unique_producers)])
count = 0 
for i in range(0,len(unique_producers)):
    attr[i][count] = 1
    count+=1

In [None]:
np.savez("data_train", data_gen_flag = flag, data_feature = complete_array, data_attribute = attr)

## Pickle gan.output.Output

In [13]:
sys.path.append(r"C:\Users\max\Anaconda\Research\DoppelGANger")
from gan.load_data import load_data
from gan import output
sys.modules["output"] = output

Messing around with parameters.

In [None]:
file = open('data_attribute_output.pkl','wb')
        
data = []
data.append(output.Output(output.OutputType.DISCRETE,11,None,False))

pickle.dump(data,file)
file.close()

file = open('data_feature_output.pkl','wb')
        
data = []
for i in range(18):
    data.append(output.Output(output.OutputType.DISCRETE,1,None,False))

pickle.dump(data,file)
file.close()