# Ample Unitizing

## [selecting_rows_based_on_conditions](https://chrisalbon.com/python/data_wrangling/pandas_select_rows_multiple_filters/)

---

In [1]:
import os, sys, time
from time import sleep
import pandas as pd
import numpy as np
from pandas import Series, DataFrame
import pyodbc
import matplotlib
from matplotlib import pyplot as plt
import seaborn as sns

In [2]:
todays_date = pd.Timestamp.now()
today_str = str(todays_date)[:10]

## *Prodflow* database in *SQL_SERVER*

In [3]:
# CREATE CONNECTION STRING FOR SQL SERVER
conn_str = str(
    r'DRIVER={ODBC Driver 17 for SQL Server};'
    r'SERVER=wtkngappflow1.is.agilent.net;'
    r'DATABASE=ProdFlow;'
    r'Trusted_Connection=yes;'
)

In [4]:
try: #{
    cnxn = pyodbc.connect(conn_str)
    crsr = cnxn.cursor()
#}
except: #{
    errorMessage = str(sys.exc_info()[0]) + "\n"
    errorMessage = errorMessage + str(sys.exc_info()[1]) + "\n\t\t"
    errorMessage = errorMessage + str(sys.exc_info()[2]) + "\n"
    exc_type, exc_obj, exc_tb = sys.exc_info()
    fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
    typeE = str("TYPE : " + str(exc_type))
    fileE = str("FILE : " + str(fname))
    lineE = str("LINE : " + str(exc_tb.tb_lineno))
    messageE = str("MESG : " + "\n\n" + str(errorMessage) + "\n")
    print("\n" + typeE + 
          "\n" + fileE + 
          "\n" + lineE + 
          "\n" + messageE)
#}
else: #{
    print("FIN...")
#}

FIN...


## tblProdflow

In [5]:
# TRY THE FOLLOWING
try: # {
    df_tblProdflow = pd.read_sql_query(sql='SELECT * FROM tblProdflow', 
                                       parse_dates=['AmpDate'],
                                       con=cnxn)
# }
except: # {
    errorMessage = str(sys.exc_info()[0]) + "\n"
    errorMessage = errorMessage + str(sys.exc_info()[1]) + "\n\t\t"
    errorMessage = errorMessage + str(sys.exc_info()[2]) + "\n"
    exc_type, exc_obj, exc_tb = sys.exc_info()
    fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
    typeE = str("TYPE : " + str(exc_type))
    fileE = str("FILE : " + str(fname))
    lineE = str("LINE : " + str(exc_tb.tb_lineno))
    messageE = str("MESG : " + "\n\n" + str(errorMessage) + "\n")
    print("\n" + typeE + 
          "\n" + fileE + 
          "\n" + lineE + 
          "\n" + messageE)
# }
else: # {
    print("Operation Completed Successfully...")
    print(df_tblProdflow.info())
# }

Operation Completed Successfully...
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 108597 entries, 0 to 108596
Data columns (total 53 columns):
PfIDNo              108597 non-null int64
PfBatchID           108597 non-null object
ProductNo           108578 non-null object
OrdID               92465 non-null float64
QuoteNo             108561 non-null object
PfSentTo            108532 non-null object
PrepDate            107583 non-null datetime64[ns]
PrepVolume          108552 non-null float64
PrepUnit            108521 non-null object
PrepVessel          108518 non-null float64
PrepVBarcode        57912 non-null object
PrdSaleUnit         89377 non-null object
PrepMatrixNo        108522 non-null object
PrepMatrixLot       107655 non-null object
PrepInits           107414 non-null object
PrepMemo            61210 non-null object
PrepNotebookRef     48969 non-null object
BulkQCStatus        24 non-null object
BulkQCDate          74962 non-null datetime64[ns]
BulkPassFail        74068 no

In [6]:
df_tblProdflow.set_index(['AmpDate'], inplace=True)

### Rename `AmpBulkRemain` column in table

In [7]:
df_tblProdflow.rename(columns={'AmpBulkRemain': 'Daily Yield'}, inplace=True)

### Get only rows from `TODAY`

In [8]:
df_todaysAmples = df_tblProdflow[today_str]

In [9]:
df_todaysAmples.T

AmpDate,2020-02-26,2020-02-26.1,2020-02-26.2,2020-02-26.3,2020-02-26.4,2020-02-26.5,2020-02-26.6,2020-02-26.7,2020-02-26.8,2020-02-26.9,2020-02-26.10,2020-02-26.11,2020-02-26.12,2020-02-26.13,2020-02-26.14,2020-02-26.15
PfIDNo,152970,153098,153161,153190,153167,153172,153178,153179,153180,153195,153200,153217,153212,153218,153132,153203
PfBatchID,NAB00813,NAB00941,NAB01004,NAB01033,NAB01010,NAB01015,NAB01021,NAB01022,NAB01023,NAB01038,NAB01043,NAB01060,NAB01055,NAB01061,NAB00975,NAB01046
ProductNo,BULK-ICC-015,AGI-P03-1,QRR010934,AGI-T04-8,AGI-P03-3,AGI-P03-9,AGI-P03-7,AGI-P03-8,DWM-580-1,AGI-T04-9,AGI-T03-1,CS1100,ICUS-7374,ICUS-0000106,G1946-85021-1,QRR010933
OrdID,107735,107500,107407,107519,107503,107507,107505,107506,107740,107520,107510,107856,107822,107880,107596,107549
QuoteNo,Stock Item,Stock Item,Stock Item,Stock Item,Stock Item,Stock Item,Stock Item,Stock Item,Stock Item,Stock Item,Stock Item,Stock Item,Stock Item,3154284,Stock Item,Stock Item
PfSentTo,done,done,done,done,done,done,done,done,QC,done,done,done,done,ampuling,QC,ampuling
PrepDate,2020-02-13 00:00:00,2020-02-19 00:00:00,2020-02-24 00:00:00,2020-02-25 00:00:00,2020-02-24 00:00:00,2020-02-24 00:00:00,2020-02-24 00:00:00,2020-02-24 00:00:00,2020-02-24 00:00:00,2020-02-25 00:00:00,2020-02-25 00:00:00,2020-02-26 00:00:00,2020-02-26 00:00:00,2020-02-26 00:00:00,2020-02-20 00:00:00,2020-02-26 00:00:00
PrepVolume,1000,200,2e+06,50,200,200,200,200,500,50,50,100,200,4000,6000,700000
PrepUnit,mL,mL,g,mL,mL,mL,mL,mL,mL,mL,mL,mL,mL,mL,mL,g
PrepVessel,1,1,4,1,1,1,1,1,1,1,1,1,1,1,1,4


### Create EMPTY DataFrame to hold Metrics

In [10]:
df_Metrics = pd.DataFrame(data=None)

In [11]:
# CREATE COLUMN TO hold PfID
df_Metrics["ID"] = df_todaysAmples['PfIDNo']

In [12]:
# CREATE COLUMN TO hold PfBatchID
df_Metrics['PfBatchID'] = df_todaysAmples['PfBatchID']

In [13]:
# CREATE column to hold number of GOOD AMPS
df_Metrics['AmpNumberGood'] = df_todaysAmples['AmpNumberGood']

In [14]:
# CHANGE TIME OUT AND TIME IN COLUMNS TO BE DATETIME
df_Metrics['Time-In'] = pd.to_datetime(df_todaysAmples['AmpTimeIn'])
df_Metrics['Time-Out'] = pd.to_datetime(df_todaysAmples['AmpTimeOut'])

In [15]:
df_Metrics['time_diff'] = df_Metrics['Time-Out'] - df_Metrics['Time-In']

In [16]:
# CHANGE TIME OUT AND TIME OUT BACK TO TIMEDELTAS
df_Metrics['Time-In'] = df_todaysAmples['AmpTimeIn'].dt.time
df_Metrics['Time-Out'] = df_todaysAmples['AmpTimeOut'].dt.time

In [17]:
df_Metrics["AmpRetains"] = df_todaysAmples["AmpRetains"] 

In [18]:
df_Metrics["Volume"] = df_todaysAmples["PrepVolume"]

In [19]:
# FILL EMPTY CELLS WITH 0
df_Metrics.fillna(value=0, inplace=True)

  


In [20]:
df_Metrics["Daily Yield"] = df_Metrics["Volume"] * df_Metrics['AmpRetains']

In [21]:
df_Metrics["Unit"] = df_todaysAmples["PrepUnit"]

In [22]:
df_Metrics

Unnamed: 0_level_0,ID,PfBatchID,AmpNumberGood,Time-In,Time-Out,time_diff,AmpRetains,Volume,Daily Yield,Unit
AmpDate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2020-02-26,152970,NAB00813,950.0,00:00:00,00:01:00,00:01:00,0.0,1000.0,0.0,mL
2020-02-26,153098,NAB00941,149.0,12:50:00,13:25:00,00:35:00,0.0,200.0,0.0,mL
2020-02-26,153161,NAB01004,100.0,07:45:00,10:30:00,02:45:00,0.0,2000000.0,0.0,g
2020-02-26,153190,NAB01033,32.0,09:00:00,09:10:00,00:10:00,0.0,50.0,0.0,mL
2020-02-26,153167,NAB01010,151.0,10:25:00,11:10:00,00:45:00,0.0,200.0,0.0,mL
2020-02-26,153172,NAB01015,150.0,11:10:00,11:45:00,00:35:00,0.0,200.0,0.0,mL
2020-02-26,153178,NAB01021,146.0,08:15:00,08:45:00,00:30:00,0.0,200.0,0.0,mL
2020-02-26,153179,NAB01022,151.0,13:50:00,14:25:00,00:35:00,0.0,200.0,0.0,mL
2020-02-26,153180,NAB01023,345.0,07:30:00,09:20:00,01:50:00,5.0,500.0,2500.0,mL
2020-02-26,153195,NAB01038,32.0,09:20:00,09:30:00,00:10:00,0.0,50.0,0.0,mL


In [23]:
df_Metrics.to_excel("c:/data/outbound/Metrics/" + today_str + "_ampule_METRICS.xlsx", index=True)

In [25]:
df_Metrics.to_csv("c:/data/outbound/Metrics/" + today_str + "_ampule_METRICS.csv", index=True)