In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
from datetime import date

In [3]:
df = pd.read_parquet('../do_not_commit/Datasets/Persist_Operating_System_DATA.pq')

In [15]:
def calculate_age_in_days(target_date_str, in_date):

    # Convert target_date_str to a Python datetime object
    target_date = datetime.strptime(target_date_str, '%Y-%m-%d').date()
    in_date = datetime.strptime(in_date, '%Y-%m-%d').date()

    # Calculate the age in days
    age_in_days = (in_date - target_date).days

    return age_in_days


def create_program_features(in_df):

    # Change effective date to type date
    in_df['InstallDate00'] = pd.to_datetime(in_df['InstallDate00']).dt.strftime('%Y-%m-%d')

    # Get age in days
    todaydate = date(2023, 7, 4)
    in_df['InstallAge'] = in_df['InstallDate00'].apply(lambda x: calculate_age_in_days(x, todaydate)
    if pd.notnull(x) else np.nan)
    return in_df


def create_last_boot_features(in_df):

    # Change effective date to type date
    in_df['LastBootUpTime00'] = pd.to_datetime(in_df['LastBootUpTime00']).dt.strftime('%Y-%m-%d')
    in_df['RWB_EFFECTIVE_DATE'] = in_df['RWB_EFFECTIVE_DATE'].astype(str)

    # Get age in days
    in_df['LastBootAge'] = in_df.apply(lambda x: calculate_age_in_days(x['LastBootUpTime00'], x['RWB_EFFECTIVE_DATE'])
    if pd.notnull(x['LastBootUpTime00']) else np.nan, axis=1)
    return in_df

In [None]:
features = df.groupby(['MachineID', 'RWB_EFFECTIVE_DATE']).agg(
    {
        'InstallDate00':'max',
        'LastBootUpTime00':'min',
        'TotalVirtualMemorySize00':'max'
    }
).reset_index()
features

In [11]:
df = create_program_features(features)

In [16]:
df = create_last_boot_features(df)

In [18]:
df.to_parquet('../do_not_commit/FeatureDatasets/operating_system_features.pq')