In [1]:
import pandas as pd
import pyodbc
import sqlalchemy.types
from sqlalchemy import create_engine
from urllib import parse
from timeit import default_timer as timer
from datetime import datetime


### Define Some Date Functions to Deal with AS400 Julian Dates


In [2]:
def date_to_julian(date_obj):
    """
    Convert a Python date/datetime to AS400 Julian format (YYDDD).

    Format: YYDDD where YY=year, DDD=day of year
    Example: January 1, 2026 -> 26001

    Args:
        date_obj: datetime.date, datetime.datetime, or string in 'YYYY-MM-DD' format

    Returns:
        int: Julian date in YYDDD format
    """
    if isinstance(date_obj, str):
        date_obj = datetime.strptime(date_obj, '%Y-%m-%d').date()
    elif isinstance(date_obj, datetime):
        date_obj = date_obj.date()

    year = date_obj.year
    year_digits = year % 100
    day_of_year = date_obj.timetuple().tm_yday

    julian = year_digits * 1000 + day_of_year
    return julian


def julian_to_date(julian_date):
    """
    Convert 5-digit Julian date (YYDDD) to yyyy-mm-dd format.
    
    Format: YYDDD where YY=year (assumes 2000s), DDD=day of year
    Example: 26001 -> 2026-01-01
    
    Args:
        julian_date: int or str, 5-digit Julian date
    
    Returns:
        str: Date in 'YYYY-MM-DD' format
    """
    from datetime import timedelta
    
    julian_str = str(int(julian_date)).zfill(5)
    year_digits = int(julian_str[:2])
    day_of_year = int(julian_str[2:])
    
    # Assume years 00-99 map to 2000-2099
    year = 2000 + year_digits
    
    # Start from January 1 of that year and add days
    base_date = datetime(year, 1, 1).date()
    target_date = base_date + timedelta(days=day_of_year - 1)
    
    return target_date.strftime('%Y-%m-%d')

### Define Some Global Constants

In [3]:
current_year = datetime.now().year
start_date = datetime(current_year, 1, 1)
stop_date = datetime.today()

SCRAP_START = date_to_julian(start_date)
SCRAP_STOP = date_to_julian(stop_date)


### Define Database Connections

In [4]:
# Define Database Connection for PROD

CONNAS400 = """
Driver={iSeries Access ODBC Driver};
system=10.143.12.10;
Server=AS400;
Database=PROD;
UID=SMY;
PWD=SMY;
"""

# Define Database Connection for ccsdta

CONNAS400_CCSDTA = """
Driver={iSeries Access ODBC Driver};
system=10.143.12.10;
Server=AS400;
Database=CCSDTA;
UID=SMY;
PWD=SMY;
"""

# Setup Database Connection to SQL Server

server = 'tn-sql'
database = 'autodata'
driver = 'ODBC+Driver+17+for+SQL+Server'
user = 'production'
pwd = parse.quote_plus("Auto@matics")
port = '1433'
database_conn = f'mssql+pyodbc://{user}:{pwd}@{server}:{port}/{database}?driver={driver}'
# Make Connection
engine = create_engine(database_conn)
conn_sql = engine.connect()

### Setup Columns and Data Types for Dataframes

In [5]:
SCRAP_COLUMNS = ["ITMID", "ITMDESC", "PRODCODE", "INVCLASS", "TXNCD", "REASON", "PLT", "TXNQTY", "UOM", "ITMCOST",
                 "FRZSTDCST", "SCRAP_COST", "TNREF1", "TXNSDT", "DATE1", "USERID"]
SCRAP_DTYPE = {
    "ITMID": str,
    "ITMDESC": str,
    "PRODCODE": str,
    "INVCLASS": str,
    "TXNCD": str,
    "REASON": str,
    "PLT": str,
    "TXNQTY": float,
    "UOM": str,
    "ITMCOST": float,
    "FRZSTDCST": float,
    "SCRAP_COST": float,
    "TNREF1": str,
    "TXNSDT": float,
    "DATE1": str,
    "USERID": str
}

In [6]:

sql_inv = """
    SELECT
    b.plt,
    y.itmid,
    b.qty,
    y.itmdesc,
    SUBSTR (Altdesc, 15, 1) Class
FROM
    CCSDTA.DCSCIM y,
    CCSDTA.DMFCMAR x,
    CCSDTA.DCSILM b
WHERE
    x.itmid = y.itmid
    AND x.itmid = b.itmid
    AND x.plt = b.plt
    AND b.plt = '09'
    AND qty <> 0
    AND x.COSTID = 'FRZ'
    AND x.plt NOT IN ('53', '54', '55', '56', '59')
"""
sql_scrap = f"""
SELECT
        x.itmid,
        y.itmdesc,
        substr(z.altdesc,12,1) PRODCODE,
        substr(z.altdesc,15,1) INVCLASS,
        txncd,
        reason,
        x.plt,
        txnqty,
        x.uom,
        x.itmcost,
        y.frzstdcst,
        ((y.frzstdcst/1000) * txnqty) AS SCRAP_COST,
        tnref1,
        txnsdt,
        userid
FROM
        CCSDTA.DCSHST x,
        CCSDTA.DCSDIM y,
        CCSDTA.DCSCIM z
WHERE
        x.plt = y.plt
        AND x.itmid = y.itmid
        AND x.itmid = z.itmid
        AND txnsdt > {SCRAP_START}
        AND txnsdt < {SCRAP_STOP}
        AND y.plt = '09'
        AND reason not in ('790', '799', 'ERR', 'OP')
        AND substr(z.altdesc,12,1) not in ('7', 'C', 'D')
        AND tnref1 not in ('PIV0099', 'RMAD303522', 'RMAD303565')
        AND txncd in ('12', '13')
"""

In [7]:

def pull_data(conn,qry):
    # Connection with error handling and connection management
    result = []
    start = 0
    msg = ""  # Initialize msg
    dbcnxn = None  # Initialize dbcnxn
    try:
        dbcnxn = pyodbc.connect(conn, timeout=30)
        cursor = dbcnxn.cursor()
        start = timer()

        try:
            cursor.execute(qry)
            result = cursor.fetchall()
            msg = str(len(result)) + " Records Processed From Table"
        except Exception as e:
            msg = conn + ' Query Failed: ' + str(e)
            print(msg)
    except pyodbc.Error as e:
        msg = conn + ' Connection Failed: ' + str(e)
        print(msg)
    finally:
        if msg:
            print(msg)
        if dbcnxn is not None:
            try:
                print("Connect/Query Time = " + str(round((timer() - start), 3)) + " sec")
                dbcnxn.close()
            except:
                pass  # Handle case where connection wasn't established

    return result

In [52]:
print(date_to_julian('2026-12-01'))

26335


In [8]:
def _build_scrap_dataframe(raw_records: list) -> pd.DataFrame:
    """
    Build the scrap DataFrame from raw AS400 records.

    This function is responsible only for shaping, cleaning, and casting
    the data into the expected schema.
    """
    if not raw_records:
        # Return an empty DataFrame with the correct schema
        return pd.DataFrame(columns=SCRAP_COLUMNS)

    # Create dataframe with columns from SQL query (excluding DATE1)
    sql_columns = ["ITMID", "ITMDESC", "PRODCODE", "INVCLASS", "TXNCD", "REASON", "PLT", "TXNQTY", "UOM", "ITMCOST",
                   "FRZSTDCST", "SCRAP_COST", "TNREF1", "TXNSDT", "USERID"]
    df_scrap = pd.DataFrame.from_records(raw_records, columns=sql_columns)

    # Drop any rows missing required fields before numeric conversion
    df_scrap = df_scrap.dropna(subset=sql_columns)

    # Convert QTY to numeric, coercing invalid entries to NaN
    df_scrap["TXNQTY"] = pd.to_numeric(df_scrap["TXNQTY"], errors="coerce")

    # Remove rows where QTY could not be converted
    df_scrap = df_scrap.dropna(subset=["TXNQTY"])

    # Convert ITMCOST to numeric, coercing invalid entries to NaN
    df_scrap["ITMCOST"] = pd.to_numeric(df_scrap["ITMCOST"], errors="coerce")

    # Remove rows where ITMCOST could not be converted
    df_scrap = df_scrap.dropna(subset=["ITMCOST"])

    # Convert SCRAP_COST to numeric, coercing invalid entries to NaN
    df_scrap["SCRAP_COST"] = pd.to_numeric(df_scrap["SCRAP_COST"], errors="coerce")

    # Remove rows where SCRAP_COST could not be converted
    df_scrap = df_scrap.dropna(subset=["SCRAP_COST"])

    # Convert TXNSDT Julian date to yyyy-mm-dd format and add to new column DATE1
    df_scrap["DATE1"] = df_scrap["TXNSDT"].astype(int).apply(julian_to_date)

    try:
        df_scrap = df_scrap.astype(SCRAP_DTYPE)
    except (TypeError, ValueError) as exc:
        print(f"Error converting scrap data types: {exc}")
        return pd.DataFrame(columns=SCRAP_COLUMNS)

    # Let pandas choose the most appropriate dtypes (nullable ints, etc.)
    df_scrap = df_scrap.convert_dtypes()

    print(f"Processed {len(df_scrap)} scrap records")
    return df_scrap

In [9]:
def scrap_tbl(df_data):
    # Build Components Table
    print('Build Scrap SQL Table')
    if df_data.empty:
        print("Warning: Empty DataFrame, skipping SQL insert")
        return

    # Validate required columns exist
    required_columns = SCRAP_COLUMNS
    missing_columns = [col for col in required_columns if col not in df_data.columns]
    if missing_columns:
        error_msg = f"Missing required columns: {missing_columns}"
        print(f"Error: {error_msg}")
        raise ValueError(error_msg)

    data_type_dict = {
        "ITMID": sqlalchemy.types.VARCHAR(255),
        "ITMDESC": sqlalchemy.types.VARCHAR(255),
        "PRODCODE": sqlalchemy.types.VARCHAR(255),
        "INVCLASS": sqlalchemy.types.VARCHAR(255),
        "TXNCD": sqlalchemy.types.VARCHAR(255),
        "REASON": sqlalchemy.types.VARCHAR(255),
        "PLT": sqlalchemy.types.VARCHAR(255),
        "TXNQTY": sqlalchemy.types.FLOAT,
        "ITMCOST": sqlalchemy.types.FLOAT,
        "FRZSTDCST": sqlalchemy.types.FLOAT,
        "SCRAP_COST": sqlalchemy.types.FLOAT,
        "TNREF1": sqlalchemy.types.VARCHAR(255),
        "TXNSDT": sqlalchemy.types.FLOAT,
        "UOM": sqlalchemy.types.VARCHAR(255),
        "USERID": sqlalchemy.types.VARCHAR(255)
    }
    try:
        df_data.to_sql('tblScrapAll', engine, schema='eng', if_exists='replace', index=False,
                         dtype=data_type_dict)
        print(f"Successfully inserted {len(df_data)} records into tblScrapAll")
    except Exception as e:
        print(f"Error inserting data into tblScrapAll: {e}")

In [10]:

data_set = pull_data(CONNAS400_CCSDTA,sql_scrap)
df_scrap1 = _build_scrap_dataframe(data_set)
print(df_scrap1.info())
print(df_scrap1.dtypes)
scrap_tbl(df_scrap1)

5429 Records Processed From Table
Connect/Query Time = 0.481 sec
Processed 5429 scrap records
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5429 entries, 0 to 5428
Data columns (total 16 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   ITMID       5429 non-null   string 
 1   ITMDESC     5429 non-null   string 
 2   PRODCODE    5429 non-null   string 
 3   INVCLASS    5429 non-null   string 
 4   TXNCD       5429 non-null   string 
 5   REASON      5429 non-null   string 
 6   PLT         5429 non-null   string 
 7   TXNQTY      5429 non-null   Float64
 8   UOM         5429 non-null   string 
 9   ITMCOST     5429 non-null   Float64
 10  FRZSTDCST   5429 non-null   Float64
 11  SCRAP_COST  5429 non-null   Float64
 12  TNREF1      5429 non-null   string 
 13  TXNSDT      5429 non-null   Int64  
 14  USERID      5429 non-null   string 
 15  DATE1       5429 non-null   string 
dtypes: Float64(4), Int64(1), string(11)
memory usage: 705.3 

#### Build Dataframe for Component Inventory

In [40]:
def get_inv():
    try:
        dbcnxn1 = pyodbc.connect(CONNAS400_CCSDTA, timeout=30)
        cursor1 = dbcnxn1.cursor()
        start = timer()

        try:
            cursor1.execute(sql_inv)
            result = cursor1.fetchall()
            msg = str(len(result)) + " AS400 Inventory Records Processed From Table"
        except Exception as e:
            msg = 'AS400 Inventory Query Failed: ' + str(e)
            result = []
            print(msg)
        finally:
            print(msg)
            print("AS400 Connect/Query Time = " + str(round((timer() - start), 3)) + " sec")
            return result
    except pyodbc.Error as e:
        msg = 'AS400 Connection Failed: ' + str(e)
        result = []
        print(msg)
    finally:
        if 'dbcnxn1' in locals():
            dbcnxn1.close()

In [41]:
print(get_inv())

AS400 Inventory Query Failed: ('42S02', '[42S02] [IBM][System i Access ODBC Driver][DB2 for i5/OS]SQL0204 - DCSCIM in SMY type *FILE not found. (-204) (SQLExecDirectW)')
AS400 Inventory Query Failed: ('42S02', '[42S02] [IBM][System i Access ODBC Driver][DB2 for i5/OS]SQL0204 - DCSCIM in SMY type *FILE not found. (-204) (SQLExecDirectW)')
AS400 Connect/Query Time = 0.003 sec
[]


In [22]:
def get_emps():
    """Get Employees From iSeries AS400"""
    try:
        dbcnxn = pyodbc.connect(CONNAS400)
        cursor = dbcnxn.cursor()
    except pyodbc.Error as e:
        msg = 'AS400 Connection Failed: ' + str(e)
        print(msg)

    eng_login = ['9999', 'ELMER J FUDD', 'ENG']
    lead_login = ['1208', 'Wile E Coyote', 'ENG']
    eng = ['1208', '9107', '1656', '1472', '1626', '1351']

    strsql = """SELECT STRIP(EMP_CLOCK_NUMBER) As Clock,
                    CONCAT(CONCAT(STRIP(EMP_FIRST_NAME), ' '),
                    STRIP(EMP_LAST_NAME)) As Name,
                    STRIP(EMP_POSITION_CODE) As Code
            FROM PROD.FPCLCKPAY
            WHERE (EMP_LOCATION = 09) AND (EMP_LAST_NAME <> 'TEMP') AND (EMP_SHIFT_TYPE = 'A')
            ORDER BY EMP_CLOCK_NUMBER"""
    try:
        cursor.execute(strsql)
        result = cursor.fetchall()
    except Exception as e:
        msg = 'AS400 Employee Query Failed: ' + str(e)
        result = []
        print(msg)
    else:
        msg = str(len(result)) + " AS400 Employee Records Processed From Table"
        print(msg)
    result.append(eng_login)
    result.append(lead_login)
    for row in result:
        if row[0] in eng:
            row[2] = 'ENG'
    dbcnxn.close()
    return result

In [23]:
print(get_emps())


471 AS400 Employee Records Processed From Table
[('1001', 'DEBBIE POMEROY-GAMMONS', 'DEF'), ('1013', 'BETH THOMPSON', 'DEF'), ('1032', 'THOMAS BOGLE', 'DEF'), ('1039', 'VICKIE ANDERSON', 'DEF'), ('1090', 'BILLY NIPPERS', 'DEF'), ('1122', 'SHELLEY HOUSTON', 'DEF'), ('1139', 'ANTIONETTE BASS', 'DEF'), ('1175', 'WYOMIA CLARK', 'DEF'), ('1176', 'NALEE NHOTSAVANG', 'DEF'), ('1177', 'CHARLES TOOMBS', 'DEF'), ('1213', 'DOUGLAS SHIPP', 'DEF'), ('1238', 'MARICHU RIVERA', 'DEF'), ('1258', 'PHOUVEUNE LAMNGEUN', 'DEF'), ('1318', 'TERESA HOLLIS', 'DEF'), ('1331', 'NANCY CLARK', 'DEF'), ('1332', 'BRENDA CUNNINGHAM', 'DEF'), ('1347', 'HERBERT GRAHAM', 'DEF'), ('1352', 'DALE GERHART', 'DEF'), ('1372', 'SITHONH BOUAPHACHANH', 'DEF'), ('1429', 'GARY BELCHER', 'DEF'), ('1444', 'RILEY BREWER  JR.', 'DEF'), ('1464', 'CHRISTOPHER HIX', 'DEF'), ('1483', 'BARRY KIRBY', 'DEF'), ('1496', 'TERRY MARTIN', 'DEF'), ('1499', 'ACKHASONE PHILAVARN', 'DEF'), ('1514', 'KHAMIN VONGPANYA', 'DEF'), ('1515', 'PHILLIP STINSO