# Python script for data transformation 

## BRICARE:

BRICARE consists of 2 different types of files by year:

a. File after 2022 (2023-2024) = 79 kolom


b. File before 2022 (2019-2022) = 27 kolom


### File Type A


Data Extraction for File Type A must be 2 Files:


A.1 Columns (without "Details")


A.2 Details only 

Columns to be cleansed or Transform:
- All columns with values "None", "NaN, "N/A", "NULL"
- These columns must follow this datetime format: format='%Y-%m-%d %H:%M:%S' or format='%Y-%m-%d %H:%M:%S.%f' 

['Create_Date','TanggalClosed', 'tanggalTransaksi','Modified_Date','tanggalAttachmentDone','Tgl_Assigned','Tgl_Eskalasi','Tanggal_Settlement','Tgl_Foward','Tgl_In_Progress','Tgl_Returned']

- Remove all unknown characters e.g. \ufeff in column "Ticket_ID"

- Columns shoud be mapped based on their Call_Type_ID:

['Produk','Jenis_Produk','Jenis_Laporan']

#### File A.1 Columns (without "Details")

In [27]:
import pandas as pd
import re
import numpy as np

# 78 Columns
column_names = [
    "Ticket_ID", "Call_Type_ID", "Call_Type", "Create_Date", "gateway", "Jenis_Laporan", "Nama_Nasabah", 
    "No_Rekening", "Nominal", "status", "TanggalClosed", "tanggalTransaksi", "Chanel", "Fitur", "Nomor_Kartu", 
    "user_group", "assgined_to", "attachment_done", "email", "full_name", "no_telepon", "approver_login", 
    "approver_name", "SLAResolution", "submitter_login_id", "submitter_user_group", "user_login_name", 
    "Jenis_Produk", "Last_Modified_By", "Merchant_ID", "Modified_Date", "NOTAS", "Produk", "SLA_Status", "TID", 
    "tanggalAttachmentDone", "Tgl_Assigned", "Tgl_Eskalasi", "AnalisaSkils", "Attachment_", "Bank_BRI", 
    "Biaya_Admin", "Suku_Bunga", "Bunga", "Butuh_Attachment", "Cicilan", "Hasil_Kunjungan", "Log_Name", 
    "MMS_Ticket_Id", "Mass_Ticket_Upload_Flag", "Nama_Supervisor", "Nama_TL", "Nama_Wakabag", "Nasabah_Prioritas", 
    "Notify_By", "Organization", "Output_Settlement", "phone_survey", "Return_Ticket", "Settlement_By", 
    "Settlement_ID", "Settlement", "Site_User", "Status_Return", "Status_Transaksi", "Submitter_Region", 
    "Submitter_SiteGroup", "Submitter_User_group_ID", "Tanggal_Settlement", "Tgl_Foward", "Tgl_In_Progress", 
    "Tgl_Returned", "Ticket_Referensi", "Tiket_Urgency", "Tipe_Remark", "UniqueID", "users", "Usergroup_ID"
]

def parse_file(file_path):

    data = []
    date_pattern = re.compile(r'\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3}')

    with open(file_path, 'r', encoding='utf-8-sig') as file:
        for line in file:
            parts = line.strip().split(';')

            date_index = next(i for i, part in enumerate(parts) if date_pattern.match(part))

            ticket_id = parts[0] 
            call_type_id = parts[1]  
            description = ';'.join(parts[2:date_index])  
            create_date = parts[date_index]  

      
            data.append([ticket_id, call_type_id, description, create_date] + parts[date_index + 1:])


    df = pd.DataFrame(data, columns=column_names)

    df['Create_Date'] = pd.to_datetime(df['Create_Date'], errors='coerce', format='%Y-%m-%d %H:%M:%S.%f')

    return df


file_path = r"C:\Users\maste\Downloads\bricare_case_januari2023_1masking.txt"

df = parse_file(file_path)
df.replace('NULL', np.nan, inplace=True)
df.replace('None', np.nan, inplace=True)
df.replace('N/A', np.nan, inplace=True)
df.fillna('', inplace=True)
df = df.replace(['0', 0], '')


columns_to_convert = ['TanggalClosed', 'tanggalTransaksi','Modified_Date','tanggalAttachmentDone','Tgl_Assigned','Tgl_Eskalasi','Tanggal_Settlement','Tgl_Foward','Tgl_In_Progress','Tgl_Returned']
for column in columns_to_convert:
    df[column] = pd.to_datetime(df[column], format='%Y-%m-%d %H:%M:%S', errors='coerce')

    df[column] = df[column].apply(lambda x: '' if pd.isna(x) else x)
   

df['Ticket_ID'] = df['Ticket_ID'].apply(lambda x: x.replace('\ufeff', '').strip())
save_path=r"D:\dataquality\bricare_20230101_20230101.csv"
df.to_csv(save_path,index=False)

  df.replace('NULL', np.nan, inplace=True)
  df.replace('None', np.nan, inplace=True)
  df.fillna('', inplace=True)


#### Cleasing the master Call Type file

In [34]:
import pandas as pd

master_df_path = r"C:\Users\maste\Downloads\bricare\(REVISED) SLA-OLA_NewUserGrouping_Ringkasan Kirim ME Versi 1.6.csv"
df = pd.read_csv(master_df_path, sep=';')


df.replace('NULL', np.nan, inplace=True)
df.replace('None', np.nan, inplace=True)
df.replace('N/A', np.nan, inplace=True)
df.fillna('', inplace=True)
df = df.replace(['0', 0], '')
df = df.dropna(how='all')
df.iloc[:450]
df.to_csv("master_calltype.csv", index=False)

  df.fillna('', inplace=True)


#### Call type mapping for columns 'Produk', 'Jenis Produk', 'Jenis Laporan'

In [38]:
import pandas as pd


user_dataset_path = r"D:\dataquality\bricare_20230101_20230101.csv"
user_df = pd.read_csv(user_dataset_path)
master_df_path = r"D:\dataquality\master_calltype.csv"
master_df = pd.read_csv(master_df_path)

# Ensure column names are consistent and clean in the master file
master_df = master_df.rename(columns={
    'Case Types': 'Call_Type_ID', 
    'Product': 'Produk', 
    'Sub Product': 'Jenis_Produk', 
    'Case Category': 'Jenis_Laporan'
})

# Convert Call_Type_ID to string in both dataframes to ensure consistency
user_df['Call_Type_ID'] = user_df['Call_Type_ID'].astype(str)
master_df['Call_Type_ID'] = master_df['Call_Type_ID'].astype(str)

# Merge dataset with the master file on 'Call_Type_ID'
merged_df = pd.merge(user_df, master_df[['Call_Type_ID', 'Produk', 'Jenis_Produk', 'Jenis_Laporan']], on='Call_Type_ID', how='left')

# Update the columns in your dataset with the mapped values from the master file
user_df['Produk'] = merged_df['Produk_y']
user_df['Jenis_Produk'] = merged_df['Jenis_Produk_y']
user_df['Jenis_Laporan'] = merged_df['Jenis_Laporan_y']

# Save the updated dataset
# updated_dataset_path = '/mnt/data/updated_merged_bricare.csv'
# user_df.to_csv(updated_dataset_path, index=False)

# Display the first few rows of the updated dataset
user_df.to_csv(user_dataset_path, index=False)

#### File A.2 Details only

In [3]:
import pandas as pd

def process_text_data(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        lines = file.readlines()

    # Remove BOM from each line
    lines = [line.replace('\ufeff', '') for line in lines]

    entries = []
    current_entry = []
    current_ticket_id = None

    for line in lines:
        if line.startswith('TTB'):
            if current_entry:  
                entries.append((current_ticket_id, '\n'.join(current_entry)))
                current_entry = []
        
            parts = line.split(',', 3)
            if len(parts) > 3:
                current_ticket_id = parts[0]  
                current_entry.append(parts[3].strip())  
            continue
        current_entry.append(line.strip())

    if current_entry:
        entries.append((current_ticket_id, '\n'.join(current_entry)))

    return entries


def remove_bom_and_strip(df):
    return df.applymap(lambda x: x.replace('\ufeff', '').strip() if isinstance(x, str) else x)


file_path = r"C:\Users\maste\Downloads\bricare_case_januari2023_2_details.txt"
processed_data = process_text_data(file_path)


df_final = pd.DataFrame(processed_data, columns=['Ticket ID', 'Details'])

if df_final.iloc[0]['Ticket ID'] and df_final.iloc[0]['Details'].startswith(df_final.iloc[0]['Ticket ID']):
    df_final.at[0, 'Details'] = df_final.iloc[0]['Details'][len(df_final.iloc[0]['Ticket ID'])+2:]

df_final.iloc[:10].to_csv('details.csv', index=False)
df_final

Unnamed: 0,Ticket ID,Details
0,TTB000043833835,Nasabah mengajukan pemblokiran kartu ATM BRI\n...
1,TTB000043833951,#BRILINKMOB\n\nDATA outlet BRILINK\nKode Outle...
2,TTB000043833734,"#CALL TERPUTUS\n\nif ch call back ,layanan IB ..."
3,TTB000043833965,Nasabah gagal melakukan transaksi tarik tunai ...
4,TTB000043833833,"ch infokan melakukan registrasi brimo, namun m..."
...,...,...
364802,TTB000044335239,"Saldo Berkurang,Nasabah gagal melakukan transa..."
364803,TTB000044335249,Nasabah gagal melakukan transaksi tarik tunai ...
364804,TTB000044335135,Nasabah melaporkan belum terima kartu perpanja...
364805,TTB000044335255,"Saldo Berkurang,Nasabah gagal melakukan transa..."
