# Ample Unitizing

## [selecting_rows_based_on_conditions](https://chrisalbon.com/python/data_wrangling/pandas_select_rows_multiple_filters/)

---

In [1]:
import os, sys, time
from time import sleep
from pathlib import Path
from datetime import datetime, timedelta
import pandas as pd
import numpy as np
from pandas import Series, DataFrame
import pyodbc
import matplotlib
from matplotlib import pyplot as plt
import seaborn as sns

In [2]:
todays_date = pd.Timestamp.now()
today_str = str(todays_date)[:10]

## *Prodflow* database in *SQL_SERVER*

In [3]:
# CREATE CONNECTION STRING FOR SQL SERVER
conn_str = str(
    r'DRIVER={ODBC Driver 17 for SQL Server};'
    r'SERVER=wtkngappflow1.is.agilent.net;'
    r'DATABASE=ProdFlow;'
    r'Trusted_Connection=yes;'
)

In [4]:
try: #{
    cnxn = pyodbc.connect(conn_str)
    crsr = cnxn.cursor()
#}
except: #{
    errorMessage = str(sys.exc_info()[0]) + "\n"
    errorMessage = errorMessage + str(sys.exc_info()[1]) + "\n\t\t"
    errorMessage = errorMessage + str(sys.exc_info()[2]) + "\n"
    exc_type, exc_obj, exc_tb = sys.exc_info()
    fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
    typeE = str("TYPE : " + str(exc_type))
    fileE = str("FILE : " + str(fname))
    lineE = str("LINE : " + str(exc_tb.tb_lineno))
    messageE = str("MESG : " + "\n\n" + str(errorMessage) + "\n")
    print("\n" + typeE + 
          "\n" + fileE + 
          "\n" + lineE + 
          "\n" + messageE)
#}
else: #{
    print("FIN...")
#}

FIN...


## tblProdflow

In [5]:
# TRY THE FOLLOWING
try: # {
    df_tblProdflow = pd.read_sql_query(sql='SELECT * FROM tblProdflow', 
                                       parse_dates=['AmpDate'],
                                       con=cnxn)
# }
except: # {
    errorMessage = str(sys.exc_info()[0]) + "\n"
    errorMessage = errorMessage + str(sys.exc_info()[1]) + "\n\t\t"
    errorMessage = errorMessage + str(sys.exc_info()[2]) + "\n"
    exc_type, exc_obj, exc_tb = sys.exc_info()
    fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
    typeE = str("TYPE : " + str(exc_type))
    fileE = str("FILE : " + str(fname))
    lineE = str("LINE : " + str(exc_tb.tb_lineno))
    messageE = str("MESG : " + "\n\n" + str(errorMessage) + "\n")
    print("\n" + typeE + 
          "\n" + fileE + 
          "\n" + lineE + 
          "\n" + messageE)
# }
else: # {
    print("Operation Completed Successfully...")
    print(df_tblProdflow.info())
# }

Operation Completed Successfully...
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 108702 entries, 0 to 108701
Data columns (total 53 columns):
PfIDNo              108702 non-null int64
PfBatchID           108702 non-null object
ProductNo           108683 non-null object
OrdID               92570 non-null float64
QuoteNo             108666 non-null object
PfSentTo            108637 non-null object
PrepDate            107688 non-null datetime64[ns]
PrepVolume          108657 non-null float64
PrepUnit            108626 non-null object
PrepVessel          108623 non-null float64
PrepVBarcode        58015 non-null object
PrdSaleUnit         89469 non-null object
PrepMatrixNo        108627 non-null object
PrepMatrixLot       107758 non-null object
PrepInits           107517 non-null object
PrepMemo            61285 non-null object
PrepNotebookRef     48969 non-null object
BulkQCStatus        24 non-null object
BulkQCDate          75041 non-null datetime64[ns]
BulkPassFail        74131 no

In [6]:
df_tblProdflow.set_index(['AmpDate'], inplace=True)

### Rename `AmpBulkRemain` column in table

In [7]:
df_tblProdflow.rename(columns={'AmpBulkRemain': 'Daily Yield'}, inplace=True)

### Get only rows from `TODAY`

In [8]:
df_todaysAmples = df_tblProdflow[today_str]

In [9]:
df_todaysAmples.T

AmpDate,2020-03-02,2020-03-02.1,2020-03-02.2,2020-03-02.3,2020-03-02.4,2020-03-02.5,2020-03-02.6,2020-03-02.7,2020-03-02.8,2020-03-02.9,2020-03-02.10,2020-03-02.11,2020-03-02.12,2020-03-02.13,2020-03-02.14,2020-03-02.15
PfIDNo,153276,153292,153297,153300,153305,153294,153296,153298,153299,153304,153113,153223,153263,153301,153306,153313
PfBatchID,NAB01119,NAB01135,NAB01140,NAB01143,NAB01148,NAB01137,NAB01139,NAB01141,NAB01142,NAB01147,NAB00956,NAB01066,NAB01106,NAB01144,NAB01149,NAB01156
ProductNo,STS-200-1,CUS-14471,CUS-00002635,CUS-14195,CUS-11251,CUS-00001047,CUS-14472,CUS-6906,CUS-00002652,CUS-9690,ICP-480,PP-150-1,ICUS-6069,CUS-7434,CUS-00000449,CUS-11008
OrdID,107654,107968,107984,107992,107993,107958,107969,107970,108052,107972,107846,107645,107890,107867,107924,108019
QuoteNo,Stock Item,Stock Item,3153720,Stock Item,Stock Item,3154222,Stock Item,Stock Item,3155438,Stock Item,Stock Item,Stock Item,Stock Item,Stock Item,021020-065,Stock Item
PfSentTo,QC,QC,QC,QC,QC,QC,QC,QC,QC,QC,ampuling,QC,QC,QC,QC,ampuling
PrepDate,2020-02-28 00:00:00,2020-02-28 00:00:00,2020-02-29 00:00:00,2020-02-29 00:00:00,2020-02-29 00:00:00,2020-02-28 00:00:00,2020-02-29 00:00:00,2020-02-29 00:00:00,2020-02-29 00:00:00,2020-02-29 00:00:00,2020-02-20 00:00:00,2020-02-26 00:00:00,2020-02-27 00:00:00,2020-02-29 00:00:00,2020-02-29 00:00:00,2020-03-02 00:00:00
PrepVolume,500,100,100,25,50,50,50,100,100,50,4000,500,1000,100,50,50
PrepUnit,mL,mL,mL,mL,mL,mL,mL,mL,mL,mL,mL,mL,mL,mL,mL,mL
PrepVessel,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1


### Create EMPTY DataFrame to hold Metrics

In [10]:
df_Metrics = pd.DataFrame(data=None)

In [11]:
# CREATE COLUMN TO hold PfID
df_Metrics["ID"] = df_todaysAmples['PfIDNo']

In [12]:
# CREATE COLUMN TO hold PfBatchID
df_Metrics['PfBatchID'] = df_todaysAmples['PfBatchID']

In [13]:
# CREATE column to hold number of GOOD AMPS
df_Metrics['GoodNo'] = df_todaysAmples['AmpNumberGood']

In [14]:
# CHANGE TIME OUT AND TIME IN COLUMNS TO BE DATETIME
df_Metrics['Time-In'] = pd.to_datetime(df_todaysAmples['AmpTimeIn'])
df_Metrics['Time-Out'] = pd.to_datetime(df_todaysAmples['AmpTimeOut'])

In [15]:
df_Metrics['elapsed_time'] = df_Metrics['Time-Out'] - df_Metrics['Time-In']

In [16]:
# CHANGE TIME OUT AND TIME OUT BACK TO datetime.timedelta
df_Metrics['Time-In'] = df_todaysAmples['AmpTimeIn'].dt.time
df_Metrics['Time-Out'] = df_todaysAmples['AmpTimeOut'].dt.time

In [17]:
df_Metrics["RetainsNo"] = df_todaysAmples["AmpRetains"] 

In [18]:
df_Metrics["Volume"] = df_todaysAmples["PrepVolume"]

In [19]:
# FILL EMPTY CELLS WITH 0
df_Metrics.fillna(value=0, inplace=True)

  


In [20]:
df_Metrics["Daily Yield"] = df_Metrics["Volume"] * df_Metrics['RetainsNo']

In [21]:
df_Metrics["Unit"] = df_todaysAmples["PrepUnit"]

In [22]:
df_Metrics

Unnamed: 0_level_0,ID,PfBatchID,GoodNo,Time-In,Time-Out,elapsed_time,RetainsNo,Volume,Daily Yield,Unit
AmpDate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2020-03-02,153276,NAB01119,367.0,12:30:00,14:00:00,01:30:00,5.0,500.0,2500.0,mL
2020-03-02,153292,NAB01135,5.0,07:25:00,07:30:00,00:05:00,0.0,100.0,0.0,mL
2020-03-02,153297,NAB01140,2.0,08:25:00,08:30:00,00:05:00,0.0,100.0,0.0,mL
2020-03-02,153300,NAB01143,4.0,11:40:00,11:45:00,00:05:00,0.0,25.0,0.0,mL
2020-03-02,153305,NAB01148,2.0,07:40:00,07:45:00,00:05:00,0.0,50.0,0.0,mL
2020-03-02,153294,NAB01137,10.0,11:45:00,11:50:00,00:05:00,0.0,50.0,0.0,mL
2020-03-02,153296,NAB01139,5.0,11:55:00,12:00:00,00:05:00,0.0,50.0,0.0,mL
2020-03-02,153298,NAB01141,1.0,08:10:00,08:15:00,00:05:00,0.0,100.0,0.0,mL
2020-03-02,153299,NAB01142,50.0,11:10:00,11:30:00,00:20:00,0.0,100.0,0.0,mL
2020-03-02,153304,NAB01147,10.0,12:00:00,12:10:00,00:10:00,0.0,50.0,0.0,mL


In [23]:
df_Metrics.to_excel("c:/data/outbound/Metrics/" + today_str + "_ampule_METRICS.xlsx", index=True)

In [24]:
df_Metrics.to_csv("c:/data/outbound/Metrics/" + today_str + "_ampule_METRICS.csv", index=True)