### 1. Import modules

In [1]:
import warnings
warnings.filterwarnings('ignore')

import pyodbc
import pandas as pd
import numpy as np

### 2. Import data to Pandas dataframe (Kidos_Toestemmingen)

In [2]:
# Import pickles
df_zuigelingen = pd.read_pickle('4_Data\Pickles\zuigelingen_observaties.pkl')

In [3]:
# Set to True or False depending on whether you want to import new data
NEWIMPORT = False

if NEWIMPORT:
    # Replace server name if source changes
    server = 'AZ-ODB0\ODBWB'
    database = 'OnderzoeksDB_WB'

    # Create a connection string
    connection_string = f'DRIVER={{SQL Server}};SERVER={server};DATABASE={database}'

    # Establish connection
    connection = pyodbc.connect(connection_string)

    # Define columns to select
    selected_columns = ['FK_CLIENT_ID',
                        'TOESTEMMINGSVRAAG',
                        'ANTWOORD',
                        'BEGINDATUM']

    # Define a list of Clientnummers to include
    included_clientnummers = df_zuigelingen['Clientnummer'].unique()
    print(len(included_clientnummers))

    # Create SQL query string with specific columns and inclusion criteria
    query = f'SELECT {', '.join(selected_columns)} FROM Kidos_TOESTEMMINGEN WHERE FK_CLIENT_ID IN ({', '.join(included_clientnummers)})'

    # Execute the query and fetch the data into a Pandas DataFrame
    df_toestemmingen_og = pd.read_sql(query, connection)

    # Close the connection
    connection.close()

    # Save df_contactmomenten_og as a pickle
    df_toestemmingen_og.to_pickle('4_Data\Pickles\\toestemmingen_zuigelingen_og.pkl')
else:
    # Load the pickle
    df_toestemmingen_og = pd.read_pickle('4_Data\Pickles\\toestemmingen_zuigelingen_og.pkl')

### 3. Process

In [4]:
# Store copy of unique records of df_og in df
df_toestemmingen = df_toestemmingen_og.drop_duplicates()

In [5]:
print(f'The amount of relations in the df_contactmomenten is: {len(df_toestemmingen)}, the amount of unique client ID\'s is: {len(df_toestemmingen['FK_CLIENT_ID'].unique())}')

The amount of relations in the df_contactmomenten is: 26238, the amount of unique client ID's is: 8295


In [6]:
# Remove all rows where Observatie_waarde != 'Toestemming gegevensuitwisseling RVP'
toestemmingen_rvp = df_toestemmingen[df_toestemmingen['TOESTEMMINGSVRAAG'] == 'Toestemming gegevensuitwisseling RVP']

# Get most recent 'toestemmingen'
zuigelingen_toestemmingen_rvp = toestemmingen_rvp.groupby('FK_CLIENT_ID')['BEGINDATUM'].idxmax()

# Get all the indices (.values) from the df
most_recent = toestemmingen_rvp.loc[zuigelingen_toestemmingen_rvp.values]

# Select only constant variables, observaties will be added later
zuigelingen_toestemmingen = most_recent[['FK_CLIENT_ID', 'ANTWOORD']]

# In column ANTWOORD replace 'wel toestemming' with 1 and 'geen toestemming' with 0
zuigelingen_toestemmingen['ANTWOORD'] = np.where(zuigelingen_toestemmingen['ANTWOORD'] == 'wel toestemming', 1, 0)

# Get the ID's and ANTWOORD and rename the columns
zuigelingen_toestemmingen = zuigelingen_toestemmingen[['FK_CLIENT_ID', 'ANTWOORD']]
zuigelingen_toestemmingen.columns = ['Clientnummer', 'Toestemming']

In [7]:
print(f'The amount of relations in the df_contactmomenten is: {len(zuigelingen_toestemmingen)}, the amount of unique client ID\'s is: {len(zuigelingen_toestemmingen['Clientnummer'].unique())}')

The amount of relations in the df_contactmomenten is: 7683, the amount of unique client ID's is: 7683


### 4. Save as .pkl

In [8]:
# Save zuigelingen_toestemmingen as a pickle
zuigelingen_toestemmingen.to_pickle('4_Data\Pickles\zuigelingen_toestemmingen.pkl')