In [1]:
from csv import reader
from numpy import array, vstack, float_, copy, abs, mean
from os import listdir, path
from re import match
from math import inf, fabs, floor, pow
from Network import NetworkGraph, NetworkFlow
from PathAssigning import MEPT
from sklearn.decomposition import PCA
from sklearn.model_selection import KFold, train_test_split
from sklearn.linear_model import LinearRegression

In [2]:
def extract_NetworkGraph(
    param_File
):
    NetworkGraphFile = param_File
    Graph = NetworkGraph()

    Line = NetworkGraphFile.readline()
    while Line:
        if Line.startswith("NODES"):
            break
        Line = NetworkGraphFile.readline()
    Line = NetworkGraphFile.readline()
    while False if match(r"^\)\s+$", Line) else True:
        Node = Line.strip().split(' ')[0]
        Graph.add_Node(Node)
        Line = NetworkGraphFile.readline()

    Line = NetworkGraphFile.readline()
    while Line:
        if Line.startswith("LINKS"):
            break
        Line = NetworkGraphFile.readline()
    Line = NetworkGraphFile.readline()
    while False if match(r"^\)\s+$", Line) else True:
        Splits = Line.strip().split(' ')
        Node1, Node2 = Splits[2], Splits[3]
        Bandwidth = float(Splits[5])
        Graph.add_Link(Node1, Node2, Bandwidth)
        Line = NetworkGraphFile.readline()

    return Graph

In [3]:
def get_FlowsList(
    param_Nodes,
    param_FlowTable
):
    Nodes = param_Nodes
    DimensionSize = len(Nodes)
    FlowTable = param_FlowTable
    FlowsList = []
    
    for Record in FlowTable:
        Record = Record.reshape(DimensionSize, DimensionSize)
        Flows = []
        for _index_Src in range(DimensionSize):
            for _index_Dst in range(DimensionSize):
                if _index_Src == _index_Dst:
                    continue
                SrcNode = Nodes[_index_Src]
                DstNode = Nodes[_index_Dst]
                FlowRate = Record[_index_Src][_index_Dst]
                Flow = NetworkFlow(SrcNode, DstNode, FlowRate)
                Flows.append(Flow)
        FlowsList.append(Flows)
        
    return FlowsList

In [4]:
def retrieve_DataInstance(
    param_CSV
):
    CSVReader = reader(param_CSV, delimiter = ',')
    Data = list(CSVReader)
    
    YInstance = array(Data.pop(0))
    YInstance = YInstance.astype(float_)
    XInstance = array(Data).reshape(-1)
    XInstance = XInstance.astype(float_)
    
    return XInstance, YInstance

In [5]:
def calc_Accuracy(
    param_ExpectedSet,
    param_PredictedSet
):
    ExpectedSet, PredictedSet = param_ExpectedSet, param_PredictedSet

    Accuracies = (100 * (1 - abs(ExpectedSet - PredictedSet) / ExpectedSet)).transpose()
    UminAccuracy = mean(Accuracies[0])
    UmaxAccuracy = mean(Accuracies[1])

    return UminAccuracy, UmaxAccuracy

In [6]:
def calc_EE(
    param_NetworkGraph
):
    Graph = param_NetworkGraph
    
    Links = Graph.get_Links()
    _numerator, _denominator = 0, len(Links)
    for Link in Links.values():
        State = Link[2]
        if State is True:
            _numerator += 1
    EE = 1 - _numerator / _denominator
    
    return EE

In [7]:
# Graph initiating
NetworkGraphFile = open("../public/data/raw_data/network-graph.txt", 'r')
Graph = extract_NetworkGraph(NetworkGraphFile)
Nodes = Graph.get_Nodes()

# Data retrieving
X = []
Y = []
for DataFile in listdir("../public/data/processed_data"):
    CSVFile = open(path.join("../public/data/processed_data", DataFile), 'r')
    XInstance, YInstance = retrieve_DataInstance(CSVFile)
    CSVFile.close()
    X.append(XInstance)   
    Y.append(YInstance)
X = vstack(X)
Y = vstack(Y)

#Sampling
XTrainValidate, XTest, YTrainValidate, YTest = train_test_split(X, Y, test_size = 0.2)
FlowsTest = get_FlowsList(Nodes, XTest)
    
# 10-Fold PCA cross-validating
Size = int(pow(len(Nodes), 2))
_size = Size
Difference = inf
ReducedSizeSet = [*range(1, 36)]
for _Size in ReducedSizeSet: 
    _XTrainValidate = copy(XTrainValidate)
    PCATransformer = PCA(n_components = _Size)
    PCATransformer.fit(_XTrainValidate)
    _XTrainValidate = PCATransformer.transform(_XTrainValidate)
    KFoldSampler = KFold(n_splits = 10, shuffle = True, random_state = 1801)
    Accuracies = []
    for _index_Train, _index_Validate in KFoldSampler.split(_XTrainValidate, YTrainValidate):
        XTrain = _XTrainValidate[_index_Train]
        YTrain = YTrainValidate[_index_Train]
        XValidate = _XTrainValidate[_index_Validate]
        YValidate = YTrainValidate[_index_Validate]
        Model = LinearRegression()
        Model.fit(XTrain, YTrain)
        YPredict = Model.predict(XValidate)
        UminAccuracy, UmaxAccuracy = calc_Accuracy(YValidate, YPredict)
        Accuracy = (UminAccuracy + UmaxAccuracy) / 2
        Accuracies.append(Accuracy)
    MeanAccuracy = sum(Accuracies) / len(Accuracies)
    ReductionRate = 100 * (1 - _Size / _size)
    _Difference = fabs(MeanAccuracy - ReductionRate)
    if _Difference <= Difference:
        Size = _Size
        Difference = _Difference
Size = floor(Size * 1.05)

# Linear Regression
PCATransformer = PCA(n_components = Size)
PCATransformer.fit(XTrainValidate)
XTrainValidate = PCATransformer.transform(XTrainValidate)
PCATransformer = PCA(n_components = Size)
PCATransformer.fit(XTest)
XTest = PCATransformer.transform(XTest)
Model = LinearRegression()
Model.fit(XTrainValidate, YTrainValidate)
YPredict = Model.predict(XTest)

# Evaluating
UminAccuracy, UmaxAccuracy = calc_Accuracy(YTest, YPredict)
EEs = []
for _index in range(200):
    Flows = FlowsTest[_index]
    Umin, Umax = YPredict[_index][0], YPredict[_index][1]
    Graph.reset_Network()
    MEPT(Graph, Flows, Umin, Umax)
    EE = calc_EE(Graph)
    EEs.append(EE)
MeanEE = sum(EEs) / len(EEs) * 100

# Result displaying
print("Number of samples: 1000")
print(f"PCA to reduce from 36 features: {Size} features")
print(f"UminAccuracy: {'{:.2f}'.format(round(UminAccuracy, 2))}%")
print(f"UmaxAccuracy: {'{:.2f}'.format(round(UmaxAccuracy, 2))}%")
print(f"Mean energy saving rate: {'{:.2f}'.format(round(MeanEE, 2))}%")

Number of samples: 1000
PCA to reduce from 36 features: 5 features
UminAccuracy: 82.34%
UmaxAccuracy: 91.67%
Mean energy saving rate: 10.80%
