In [1]:
# see github.com/smaroukis/site-analysis
# don't forget double (()) 's for numpy operations and stacking

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import numpy.matlib as M
from numpy.matlib import rand,zeros,ones,empty,eye
np.set_printoptions(precision=3)
%matplotlib inline
plt.style.use('ggplot')

### Importing Raw Data and Setup

In [None]:
# Import Actual Data
# CSV has Criteria=Columns and Site Name=Rows
raw = np.genfromtxt('site_data_raw.csv', delimiter = ',')
# List Sites in Order (not in .csv)
site_list = ['Saginaw Malleable','Pontiac North Industrial','Linden Road','Bay City Powertrain','Coldwater Road','Davison Road','Greenpoint','Hemphill','Saginaw Malleable Peninsula','Van Buren','Buick City','Flint West (Kettering)','Danville','Allison Gas Turbine','Framingham','Massena','Salina', 'Tonawanda','Elyria','Toledo']

In [2]:
# Setup for Testing (using ones matrix) 
# (as array = matrix.A)
# Create Column Array of Ones
onemat = ones((10)).A.T
# Create Array with Columns as 1s, 2s, 3s, etc
smpat = np.hstack((onemat, 2*onemat, 3*onemat, 4*onemat, 5*onemat, 6*onemat, 7*onemat, 8*onemat, 9*onemat, 10*onemat))
# Initialize results matrix
results = zeros((10,10)).A

In [None]:
# Setup for Actual Analysis (using weights)
# Import weights, apply with np.dot(A,wgts) where A is the permutated matrix
eyes = eye((11)).A
arr = np.array([20., 16., 15., 15., 8., 5., 5., 5., 4., 4., 3.])/100
wgts = eyes*arr

### N=100 Random Normal Permutations

In [None]:


results = np.copy(raw) # Copy the original matrix to permute

N = 101
# N randomizations (of master ranking), creating 10x10xN matrix
# get results of kth simulation with results[:,:,k-1]
for i in range(0,N):
    cpy = np.copy(raw)
    # Permute
    # If < 0 set to 0, if > 10 set to 10
    for x in np.nditer(cpy, op_flags=['readwrite']):
        # Random Normal Permutation = mu + sigma * randn
        x[...] = x + M.randn(1,1)*1 
        if x>10: x[...] = 10
        if x<0: x[...] = 0
    results = np.dstack((results,cpy))

# Check shape and save 
results.shape
np.save('results',results)

### Analysis Part I

In [None]:
# Import Results
results = np.load('results.npy')

In [None]:
# Score (on 1-10)
score_list = []
for i in range(0,N):
    score_list.append(pd.Series(data=np.sum(np.dot(results[:,:,i], wgts), axis=1), index = [k for k in site_list]))
scores = pd.DataFrame(score_list)

# Ranking (on 1-#Sites)
rank_list = []
for i in range(0,N):
    rank_list.append(pd.Series(data=np.sum(np.dot(results[:,:,i], wgts), axis=1), index = [k for k in site_list]).rank(ascending = False))
ranks = pd.DataFrame(rank_list)

In [None]:
# Plotting
# Plot as 10 historgrams of distribution of SCORES 
scores.boxplot(vert=0)
plt.tight_layout()
plt.xlabel('Score')
plt.title('Distribution of Scores for $N=100$, $\sigma = 1$')
plt.savefig('score_boxplot.pdf')
plt.show()


# Plot as 11 histograms of distribution of RANKINGS
ranks.boxplot(vert=0)
plt.title('Distribution of Rankings for $N=100$, $\sigma = 1$')
plt.xlabel('Ranking')
plt.xticks([i for i in range(1,21)], rotation = 'vertical')
plt.tight_layout()
plt.savefig('rank_boxplot.pdf')
plt.show()

### Analysis Part II

In [None]:
results = np.load('results.npy')

In [None]:
#### Use for BaseCase

qualifier = 'BaseCase'
t2 = '$N=100$, $\sigma = 1$'
def plot_res(_type, _qualifiertext, _arr, _n, _wgts, _site_list):
    # _type is 'score' or 'rank'
    score_list = []
    
    if _type == 'score':
        for i in range(0, _n):
            score_list.append(pd.Series(data=np.sum(np.dot(_arr[:, :, i], _wgts), axis=1), index=[k for k in _site_list]))
    if _type == 'rank':
        for i in range(0, _n):
            score_list.append(pd.Series(data=np.sum(np.dot(_arr[:, :, i], _wgts), axis=1), index=[k for k in _site_list]).rank(ascending=False))
            
    scores = pd.DataFrame(score_list)

    # Plot as 10 histograms of distribution of scores
    flierprops = dict(marker='.', markerfacecolor='red', alpha=0.6, markersize=6, markeredgecolor='none')
    bp = scores.boxplot(vert=0, flierprops=flierprops)
    plt.setp(bp['boxes'], color='red')
    plt.setp(bp['whiskers'],color='red')
    plt.setp(bp['fliers'],color='red')
    plt.tight_layout()
    plt.xlabel('Score')
    plt.title('Distribution of ' + _type + ' (' + _qualifiertext + ')')
    plt.savefig(r'results/' + _qualifiertext + r'_' + _type + '.png', transparent=True, dpi=1000)
    plt.show()
    
    return scores

basescores = plot_res('score', qualifier, results, N, wgts, site_list)
baseranks = plot_res('rank', qualifier, results, N, wgts, site_list)

In [None]:
## Incentives

# Set all of the (20, c1, 101) to "5" .. or 0 .. or 10?
results_uni = np.copy(results)
results_uni[:,0,:]=5 # 0 index is Incentives
qualifier = 'Incentives'

In [None]:
#### Permitting

# Set all of the (20, c1, 101) to "5" .. or 0 .. or 10?
results_uni = np.copy(results)
results_uni[:,1,:]=5 # 1 index is Permitting
qualifier = 'Permitting'

In [None]:
#### Substation

# Set all of the (20, c1, 101) to "5" .. or 0 .. or 10?
results_uni = np.copy(results)
results_uni[:,2,:]=5 # 2 index is Substation
qualifier = 'Substation'

In [None]:
#### Evironmental

# Set all of the (20, c1, 101) to "5" .. or 0 .. or 10?
results_uni = np.copy(results)
results_uni[:,3,:]=5 # 3 index is Enviro
qualifier = 'Environmental'

In [None]:
# Run one of the above criteria cells first

def plot_res(_type, _qualifiertext, _arr, _n, _wgts, _site_list):
    # _type is 'score' or 'rank'
    score_list = []
    
    if _type == 'score':
        for i in range(0, _n):
            score_list.append(pd.Series(data=np.sum(np.dot(_arr[:, :, i], _wgts), axis=1), index=[k for k in _site_list]))
    if _type == 'rank':
        for i in range(0, _n):
            score_list.append(pd.Series(data=np.sum(np.dot(_arr[:, :, i], _wgts), axis=1), index=[k for k in _site_list]).rank(ascending=False))        
    scores = pd.DataFrame(score_list)
    # Plot as 10 historgrams of distribution of scores
    flierprops = dict(marker='.', markerfacecolor='blue', alpha=0.6, markersize=6, markeredgecolor='none')
    bp = scores.boxplot(vert=0, flierprops=flierprops)
    plt.setp(bp['boxes'], color='blue')
    plt.setp(bp['whiskers'],color='blue')
    plt.setp(bp['fliers'],color='blue')
    plt.setp(bp['medians'], color='blue')
    plt.tight_layout()
    plt.xlabel('Score')
    plt.title('Distribution of ' + _type + ' (' + _qualifiertext + ')')
    plt.savefig(r'results/' + _qualifiertext + r'_' + _type + '.png', transparent=True, dpi=1000)
    plt.show()
    
    return scores

scores = plot_res('score', qualifier, results_uni, N, wgts, site_list)
ranks = plot_res('rank', qualifier, results_uni, N, wgts, site_list)
    