In [28]:
import pandas as pd
import numpy as num
import matplotlib.pyplot as plt
import gurobipy as gp
from   gurobipy import GRB
import time
import datetime
import math
import csv
import os
import warnings
import re
warnings.filterwarnings('ignore')
import Data_Functions as f

In [29]:
directory = 'C:/Users/bount/Colonial-Storage/data_new_2/'
filenames = os.listdir(directory)
cd = os.getcwd()

In [30]:
dfs = []
for file in filenames:
    df = pd.read_excel (directory + file, header=0, sheet_name = "Sheet1")
    df['file']  = file
    df['Start'] = pd.to_datetime(df['Start Date/Time'])
    df['End']   = pd.to_datetime(df['End Date/Time'])
    df['Identifier'] = df['Code'] + '//' + df['Start'].apply(lambda x: x.strftime('%Y-%m-%d %H:%M:%S')) + '//' + df['End'].apply(lambda x: x.strftime('%Y-%m-%d %H:%M:%S')) + '//' + df['Line'].astype(str)
    dfs.append(df)

df_1 = pd.concat(dfs)

In [31]:
#-------------------------------------------------------------------------
# Break down the code
#
temp = df_1['Code'].str.extract(r'(\w+)-(\w+)-(\w+)')
temp.columns = ['Client', 'Product', 'Cycle']
df_1 = pd.concat([df_1, temp], axis=1)

#-------------------------------------------------------------------------
# Product
#
def product(df):
    if (df['Product Grade'] in ['A3', 'A4']):
        return 'A'
    elif (df['Product Grade'] in ['D3', 'D4']):
        return 'D'
    else:
        return df['Product Grade']
df_1['Product'] = df_1.apply(product, axis = 1)
df_1['Product'] = df_1['Product'].astype(str)
    
#-------------------------------------------------------------------------
# Line
#    
def line(df):
    if (df['Line'] in [1]):
        return '01'
    if (df['Line'] in [2]):
        return '02'
    else:
        return df['Line']
df_1['Line']    = df_1.apply(line, axis = 1)
df_1['Line']    = df_1['Line'].astype(str)

#-------------------------------------------------------------------------
# Line
#
def typ(df):
    if (df['Line'] in ['01', '02']):
        return 'In'
    else:
        return 'Out'
df_1['Type']    = df_1.apply(typ, axis = 1)

#-------------------------------------------------------------------------
# Exclude
#
df_1['Time_difference'] = df_1['End'] - df_1['Start']
df_1['Time_in_hours']   = df_1['Time_difference'] / pd.Timedelta(hours=1)
df_1['Vol_per_Hr']      = df_1['Volume'] / df_1['Time_in_hours']
def exclude(df):
    if (df['Vol_per_Hr'] >= 50):
        return 1
    else:
        return 0    
df_1['Exclude'] = df_1.apply(exclude, axis = 1)

#-------------------------------------------------------------------------
# Misc
#
df_1 = df_1.rename(columns={'Vol Tank':'Volume_Tank'})

In [32]:
#-------------------------------------------------------------------------
# Exclude columns with errors
# 
print('We are excluding ' + str(sum(df_1['Exclude'])) + ' rows.')
df_1 = df_1[df_1['Exclude'] == 0]

We are excluding 52 rows.


In [33]:
#-------------------------------------------------------------------------
# Create the "Pick" column
# 
#df_1['Rank'] = df_1.groupby('Identifier')['file'].rank(ascending = False)
#df_1[df_1['Rank'] > 1]
# Calculate the Volume for each cycle/line/file combination
# df_2         = df_1.groupby(['Cycle', 'Line', 'file']).agg({'Volume': ['sum']}).reset_index()
# df_2.columns = [i[0]+"_"+i[1] for i in df_2.columns]
# df_2         = df_2.rename(columns={'Cycle_': 'Cycle', 'Line_': 'Line', 'file_': 'file', 'Volume_sum': 'Volume'})
# # Rank the entries by Volume with each cycle/line segment
# df_2['Rank'] = df_2.groupby(['Cycle', 'Line'])['Volume'].rank(ascending = False)

df_2         = df_1.groupby(['Cycle', 'file']).agg({'Volume': ['sum']}).reset_index()
df_2.columns = [i[0]+"_"+i[1] for i in df_2.columns]
df_2         = df_2.rename(columns={'Cycle_': 'Cycle', 'file_': 'file', 'Volume_sum': 'Volume'})
df_2['Rank'] = df_2.groupby(['Cycle'])['Volume'].rank(ascending = False)
# Keep only the highest entry (Rank = 1)
df_2         = df_2[df_2['Rank'] == 1]
df_2['Pick'] = 1
df_2         = df_2.drop(['Volume', 'Rank'], axis=1)

In [34]:
df_3       = pd.merge(df_1, df_2, on = ['Cycle', 'file'], how='outer')
df_4       = df_3[df_3['Pick'] == 1]
df_tickets = df_4

In [35]:
#---------------------------------------------------------------------------
# Remove rows
#
# nan_values = df_tickets.isna()
# index = df_tickets.loc[nan_values['Tank'] == True].index
# df_tickets = df_tickets.drop(index=index)

#---------------------------------------------------------------------------
# Audit and remove values
#
def audit(df):
    print("***Cycle***")
    print(list(set(list(df.Cycle))))
    print("***Tanks***")
    print(list(set(list(df.Tank))))
    print("***Products***")
    print(list(set(list(df.Product))))
    
audit(df_tickets)  

***Cycle***
['611', '542', '602', '551', '531', '571', '603', '592', '562', '593', '601', '552', '582', '573', '581', '604', '561', '541', '563', '591', '59H']
***Tanks***
[310.0, 311.0, 312.0, 313.0, 314.0, 315.0, 316.0, 317.0, 330.0, 331.0, 332.0, 333.0, 334.0, 336.0, 337.0, 338.0, 339.0, 350.0, 351.0, 352.0, 353.0, 354.0, 361.0, 363.0, 370.0, 371.0, 372.0, 373.0, 374.0, 375.0, 376.0]
***Products***
['M3', 'M4', '54', 'V3', '96', 'V4', 'D', 'A', '62']


In [36]:
ret = f.data_volume(df_tickets)

In [37]:
VolIn = ret['VolIn']
with open('results/VolIn_2.csv', 'w', newline="") as f:
    writer = csv.writer(f)
    for key, value in VolIn.items():
        writer.writerow([key, value])

VolOut = ret['VolOut']
with open('results/VolOut_2.csv', 'w', newline="") as f:
    writer = csv.writer(f)
    for key, value in VolOut.items():
        writer.writerow([key, value])   
 
VolExist = ret['VolExist']
with open('results/VolExist_2.csv', 'w', newline="") as f:
    writer = csv.writer(f)
    for key, value in VolExist.items():
        writer.writerow([key, value]) 

In [50]:
# df = df_3[(df_3['Cycle'] == '551') & (df_3['Pick'] == 1)]
# #df.groupby(['Type', 'Product']).agg({'Volume': ['sum']})
# df.to_csv("results/temp.csv")