## Detail Column of Data 2023 for BRICARE

In [1]:
import pandas as pd

def process_text_data(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        lines = file.readlines()

    entries = []
    current_entry = []
    current_ticket_id = None

    for line in lines:
        if line.startswith('TTB'):
            if current_entry:  # If there's collected content, append it with the Ticket ID
                entries.append((current_ticket_id, '\n'.join(current_entry)))
                current_entry = []
            # Extract Ticket ID and the part after the third comma
            parts = line.split(',', 3)
            if len(parts) > 3:
                current_ticket_id = parts[0]  # Store the Ticket ID
                current_entry.append(parts[3].strip())  # Start collecting the entry content
            continue
        current_entry.append(line.strip())

    # Add the last collected entry if any
    if current_entry:
        entries.append((current_ticket_id, '\n'.join(current_entry)))

    return entries

# Path to your text file
file_path = r"C:\Users\maste\Downloads\bricare_case_januari2023_2_details.txt"
processed_data = process_text_data(file_path)

# Create a DataFrame from the processed data
df_final = pd.DataFrame(processed_data, columns=['Ticket ID', 'Content'])

# Adjust the first entry in the Content column if necessary
if df_final.iloc[0]['Ticket ID'] and df_final.iloc[0]['Content'].startswith(df_final.iloc[0]['Ticket ID']):
    # Remove the Ticket ID and any two characters before it from the content
    df_final.at[0, 'Content'] = df_final.iloc[0]['Content'][len(df_final.iloc[0]['Ticket ID'])+2:]

# Save the DataFrame to a CSV file
# df_final.to_csv('exp_5.csv', index_label='Index')

df_final.head()  # Display the first few rows to check the output


Unnamed: 0,Ticket ID,Content
0,,"ï»¿TTB000043833835,8701,Blokir Kartu ATM karena ..."
1,TTB000043833951,#BRILINKMOB\n\nDATA outlet BRILINK\nKode Outle...
2,TTB000043833734,"#CALL TERPUTUS\n\nif ch call back ,layanan IB ..."
3,TTB000043833965,Nasabah gagal melakukan transaksi tarik tunai ...
4,TTB000043833833,"ch infokan melakukan registrasi brimo, namun m..."


## Modify the first line

In [48]:
import pandas as pd

# Load the dataset
data = pd.read_csv('D:\dataquality\exp_5.csv', encoding='utf-8-sig')  # Handles any encoding issues like BOM

# Initialize a list to store processed data
processed_data = []

# Process the first line
first_line = data.loc[0, 'Content']
ticket_id_first_line = first_line.split(',')[0]
content_after_third_comma_first_line = ','.join(first_line.split(',')[3:]) if len(first_line.split(',')) > 3 else None
processed_data.append({'Ticket ID': ticket_id_first_line, 'Content': content_after_third_comma_first_line})

# Process the rest of the lines normally
for index, row in data.iterrows():
    if index == 0:  # Skip the first line as it is already processed
        continue
    if pd.notna(row['Ticket ID']) and row['Ticket ID'].startswith('TTB'):
        content_parts = row['Content'].split(',')
        processed_content = ','.join(content_parts[3:]) if len(content_parts) > 3 else row['Content']
    else:
        processed_content = row['Content']  # Keep the original content if not starting with TTB
    processed_data.append({'Ticket ID': row['Ticket ID'], 'Content': processed_content})

# Create a DataFrame from the processed data
result_df = pd.DataFrame(processed_data)



result_df

# result_df.to_csv('exp_6.csv', index=False)

Unnamed: 0,Ticket ID,Content
0,ï»¿TTB000043833835,Nasabah mengajukan pemblokiran kartu ATM BRI\n...
1,TTB000043833951,#BRILINKMOB\n\nDATA outlet BRILINK\nKode Outle...
2,TTB000043833734,", nomor tt\n\n\n\nNasabah mengajukan pemblok..."
3,TTB000043833965,Nasabah gagal melakukan transaksi tarik tunai ...
4,TTB000043833833,"ch infokan melakukan registrasi brimo, namun m..."
...,...,...
364802,TTB000044335239,"Saldo Berkurang,Nasabah gagal melakukan transa..."
364803,TTB000044335249,Nasabah gagal melakukan transaksi tarik tunai ...
364804,TTB000044335135,"\nADDR 2, ,JL RAYA MERDEKA NO 110 ,\nAD..."
364805,TTB000044335255,"Saldo Berkurang,Nasabah gagal melakukan transa..."


## Data 2023 and after 

In [47]:
import pandas as pd
import re
import numpy as np

# 78 Columns
column_names = [
    "Ticket_ID", "Call_Type_ID", "Call_Type", "Create_Date", "gateway", "Jenis_Laporan", "Nama_Nasabah", 
    "No_Rekening", "Nominal", "status", "TanggalClosed", "tanggalTransaksi", "Chanel", "Fitur", "Nomor_Kartu", 
    "user_group", "assgined_to", "attachment_done", "email", "full_name", "no_telepon", "approver_login", 
    "approver_name", "SLAResolution", "submitter_login_id", "submitter_user_group", "user_login_name", 
    "Jenis_Produk", "Last_Modified_By", "Merchant_ID", "Modified_Date", "NOTAS", "Produk", "SLA_Status", "TID", 
    "tanggalAttachmentDone", "Tgl_Assigned", "Tgl_Eskalasi", "AnalisaSkils", "Attachment_", "Bank_BRI", 
    "Biaya_Admin", "Suku_Bunga", "Bunga", "Butuh_Attachment", "Cicilan", "Hasil_Kunjungan", "Log_Name", 
    "MMS_Ticket_Id", "Mass_Ticket_Upload_Flag", "Nama_Supervisor", "Nama_TL", "Nama_Wakabag", "Nasabah_Prioritas", 
    "Notify_By", "Organization", "Output_Settlement", "phone_survey", "Return_Ticket", "Settlement_By", 
    "Settlement_ID", "Settlement", "Site_User", "Status_Return", "Status_Transaksi", "Submitter_Region", 
    "Submitter_SiteGroup", "Submitter_User_group_ID", "Tanggal_Settlement", "Tgl_Foward", "Tgl_In_Progress", 
    "Tgl_Returned", "Ticket_Referensi", "Tiket_Urgency", "Tipe_Remark", "UniqueID", "users", "Usergroup_ID"
]

def parse_file(file_path):
    # Initialize a list to hold the parsed data
    data = []
    date_pattern = re.compile(r'\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3}')

    with open(file_path, 'r', encoding='utf-8-sig') as file:
        for line in file:
            parts = line.strip().split(';')
            # Find the index of the first date (fourth column in your description)
            date_index = next(i for i, part in enumerate(parts) if date_pattern.match(part))

            # Extract columns
            ticket_id = parts[0] 
            call_type_id = parts[1]  
            description = ';'.join(parts[2:date_index])  # Third column, concatenating all parts up to the date
            create_date = parts[date_index]  # Fourth column, the first date found

      
            data.append([ticket_id, call_type_id, description, create_date] + parts[date_index + 1:])


    df = pd.DataFrame(data, columns=column_names)

    # Date
    df['Create_Date'] = pd.to_datetime(df['Create_Date'], errors='coerce', format='%Y-%m-%d %H:%M:%S.%f')

    return df


file_path = r"C:\Users\maste\Downloads\bricare_case_januari2023_1masking.txt"
df = parse_file(file_path)

# Replace 'NULL', 'N/A' and None with NaN
df.replace('NULL', np.nan, inplace=True)
df.replace('None', np.nan, inplace=True)
df.replace('N/A', np.nan, inplace=True)
df.fillna('', inplace=True)
df = df.replace(['0', 0], '')

#Convert the cols into Datetime
columns_to_convert = ['TanggalClosed', 'tanggalTransaksi','Modified_Date','tanggalAttachmentDone','Tgl_Assigned','Tgl_Eskalasi','Tanggal_Settlement','Tgl_Foward','Tgl_In_Progress','Tgl_Returned']
for column in columns_to_convert:
    df[column] = pd.to_datetime(df[column], format='%Y-%m-%d %H:%M:%S.%f', errors='coerce')
     # Replace NaT with an empty string
    df[column] = df[column].apply(lambda x: '' if pd.isna(x) else x)
   


# Picklist mapping
mapping_sets = [
    {'Yes': 'TRUE', 'No': 'FALSE'},
    {'Simpanan': 'Savings', 'Pinjaman': 'Loans'}
]
mapping = {}
for mapping_set in mapping_sets:
    mapping.update(mapping_set)

# Function to apply mapping
def apply_mapping(value):
    return mapping.get(value, value)

df = df.applymap(apply_mapping)
pd.set_option('display.max_columns', None)

df['Jenis_Laporan'].unique()
# Display
# df=df.iloc[:1]
# df.iloc[:1].to_csv("oneline.csv",index=False)


  df.replace('NULL', np.nan, inplace=True)
  df.replace('None', np.nan, inplace=True)
  df.fillna('', inplace=True)
  df = df.applymap(apply_mapping)


array(['Maintenance', 'Information', 'Complain'], dtype=object)

## Data before 2023

In [40]:
import pandas as pd


column_list = [
    "Ticket_ID",  
    "Call_Type_ID",  
    "Call_Type", 
    "Create_Date",  
    "gateway",  
    "Jenis_Laporan",  
    "Nama_Nasabah",  
    "No_Rekening", 
    "Nominal",  
    "status",  
    "TanggalClosed", 
    "tanggalTransaksi",  
    "Chanel",  
    "Fitur",  
    "Nomor_Kartu", 
    "user_group",  
    "assgined_to",  
    "attachment_done",  
    "email",  
    "full_name",  
    "no_telepon",  
    "approver_login",  
    "approver_name",  
    "SLAResolution",  
    "submitter_login_id",  
    "submitter_user_group", 
    "user_login_name"  
]

path=r"C:\Users\maste\Downloads\BRICARE_25042024 masking.csv"
data=pd.read_csv(path, delimiter=';')





# Convert Column1 to string if not already, and apply the regex filter for "TTB" followed by numbers
data['Column1'] = data['Column1'].astype(str)
data_cleaned = data[data['Column1'].str.match(r'TTB\d+')]

# Ensure Column2 is an integer type and contains exactly four digits
data_cleaned['Column2'] = data_cleaned['Column2'].astype(str)
data_cleaned = data_cleaned[data_cleaned['Column2'].str.match(r'^\d{4}$')]

# Ensure all entries in Column4 can be converted to datetime and filter out those that can't
data_cleaned['Column4'] = pd.to_datetime(data_cleaned['Column4'], errors='coerce')
data_cleaned = data_cleaned.dropna(subset=['Column4'])

# Drop Column 28-32
data_to_drop=['Column28','Column29','Column30','Column31','Column32']
data_cleaned=data_cleaned.drop(columns=data_to_drop)

# Display the cleaned data again
# data_to_show=['Column28','Column29','Column30','Column31','Column32']
# data_cleaned=data_cleaned[data_to_show]

# Check all column containing NaN
# data_cleaned=data_cleaned.dropna()
# data_cleaned

# data_cleaned.columns = column_list[:len(data_cleaned.columns)]
    
if len(data_cleaned.columns) <= len(column_list):
    data_cleaned.columns = column_list[:len(data_cleaned.columns)]

# Replace 'NULL', 'N/A' and None with NaN
data_cleaned.replace('NULL', np.nan, inplace=True)
data_cleaned.replace('None', np.nan, inplace=True)
data_cleaned.replace('N/A', np.nan, inplace=True)
data_cleaned.fillna('', inplace=True)
data_cleaned = data_cleaned.replace(['0', 0], '')

#Convert the cols into Datetime
columns_to_convert = ['TanggalClosed', 'tanggalTransaksi','Create_Date']
for column in columns_to_convert:
    data_cleaned[column] = pd.to_datetime(data_cleaned[column], format='%Y-%m-%d %H:%M:%S.%f', errors='coerce')
     # Replace NaT with an empty string
    data_cleaned[column] = data_cleaned[column].apply(lambda x: '' if pd.isna(x) else x)


data_cleaned



# df=df.iloc[:6]
# df.to_csv("not_cleanfordate.csv")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_cleaned['Column2'] = data_cleaned['Column2'].astype(str)


Unnamed: 0,Ticket_ID,Call_Type_ID,Call_Type,Create_Date,gateway,Jenis_Laporan,Nama_Nasabah,No_Rekening,Nominal,status,TanggalClosed,tanggalTransaksi,Chanel,Fitur,Nomor_Kartu,user_group,assgined_to,attachment_done,email,full_name,no_telepon,approver_login,approver_name,SLAResolution,submitter_login_id,submitter_user_group,user_login_name
0,TTB000026204763,8425,Pen-delete-an Status Registrasi Layanan yang A...,2020-01-01 07:19:37,Phone,Maintenance,Arif Budi Saputra,021234567890123,0.00,Closed,2020-01-01 07:19:43,,UKO,e-channel,5221841189816746,LCC-CRC,,,,Really Artha Ully Manik,081234567890,,,20,90136590,,Really Artha Ully Manik
1,TTB000026204728,8405,Kartu ATM BRI Tertelan di MESIN ATM,2020-01-01 07:19:30,Phone,Information,Arif Budi Saputra,021234567890123,0.00,Closed,2020-01-01 07:17:02,2020-01-01 00:00:00,ATM BRI,Kartu tertelan,5221842126912762,LCC-CCTCALL,,,,DELLA LARASSARI,081234567890,90022934,Adhi Nitidharma,20,90135196,,DELLA LARASSARI
2,TTB000026204747,8202,Informasi Product Banking,2020-01-01 07:19:27,Phone,Information,Arif Budi Saputra,021234567890123,741700.00,Closed,2020-05-20 08:50:01,2020-01-01 00:00:00,,,5221842112540379,LCC-CCTCALL,90140806,,,Kartika Fitriani,081234567890,,,20,90141079,LCC-CCTCALL,Kartika Fitriani
4,TTB000026204659,8812,Nasabah BRI gagal tarik tunai & terdebet di AT...,2020-01-01 07:00:34,Phone,Information,Arif Budi Saputra,021234567890123,1000000.00,Closed,2020-01-01 07:00:29,2020-01-01 00:00:00,ATM BRI,Tarik Tunai,6013011000558717,LCC-ON US,,,,AMELIA RAHMADANI,081234567890,00000723,Ismail,10,60443,LCC-CCTCALL,AMELIA RAHMADANI
5,TTB000026204577,8202,Informasi Product Banking,2020-01-01 07:09:29,Phone,Information,Arif Budi Saputra,021234567890123,500000.00,Closed,2020-05-20 08:47:29,2020-01-01 00:00:00,,,5326595007099497,LCC-CCTCALL,90140806,,bacuelkueh@gmail.com,Amalia Fitriana,081234567890,,,20,60422,LCC-CCTCALL,Amalia Fitriana
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
375778,TTB000026715685,8411,Salah Transfer antar BRI,2020-01-31 19:32:26,Phone,Complain,HERIWATI,546501003139532,100001.00,Closed,2022-11-18 13:02:14,2020-01-29 00:00:00,ATM BRI,Transfer,6013013017499349,05545 -- UNIT MUARA LABUH SOLOK,90110471,,,ADE SUTISNA,082387480456,,,20,90135194,LCC-CCTCALL,ADE SUTISNA
375779,TTB000026713843,8411,Salah Transfer antar BRI,2020-01-31 17:28:34,Phone,Complain,MUTIAH,609401002283508,1950000.00,Closed,2023-03-01 18:01:47,2020-01-31 00:00:00,ATM BRI,Transfer,5221842102972780,00206 -- Jkt KCK,90110471,,,Umar Fahruddin Pratama,082136107896,,,20,90135689,,Umar Fahruddin Pratama
375780,TTB000026714586,7700,Komplain Transaksi Kartu Kredit tidak di akui,2020-01-31 18:24:32,Phone,Complain,MULIYA HARDIYANTO,,0.00,Closed,2023-03-08 10:08:40,,,Sanggahan,5188289230916308,ADMIN QSC,,,,Cintia Fadila,05264513380,,,67,90123773,LCC-CCTCALL,Cintia Fadila
375782,TTB000026713292,8411,Salah Transfer antar BRI,2020-01-31 16:56:02,Phone,Complain,S I M O N,064201002986507,9650000.00,Closed,2022-10-13 16:08:20,2020-01-31 00:00:00,ATM BRI,Transfer,5326590001246103,00252 -- Jeneponto,90110471,,,Alfera Dyah Pangestu,081241313888,,,20,90138706,,Alfera Dyah Pangestu


## Zendesk

In [41]:
import pandas as pd
import openpyxl 


pd.set_option('display.max_columns', None)
# path=r"C:\Users\maste\Downloads\Data Zendesk.csv"
path=r"C:\Users\maste\Downloads\RPA_Report_1_05072024_0802.xlsx"
df=pd.read_excel(path)


# Change the datetime format

date_columns= ['Requester created - Timestamp','Ticket created - Timestamp','Ticket solved - Timestamp']
for col in date_columns:
    df[col] = pd.to_datetime(df[col]).dt.strftime('%Y-%m-%d %H:%M:%S.%f')

# Remove the Ticket Column
df=df.drop('Tickets',axis=1)


#Filter out all data with values "Instagram Direct"
filtered_df = df[df['Ticket channel'] == 'Instagram Direct']
# 256 rows

#Remove all lines without "Instagram Direct"
filtered_df2 = df[df['Ticket channel'] != 'Instagram Direct']


#Define Mapping sets
mapping_sets = [
    {'Any channel': 'Instagram'}
]

# Merge all mapping sets into a single dictionary
mapping = {}
for mapping_set in mapping_sets:
    mapping.update(mapping_set)

# Function to apply mapping
def apply_mapping(value):
    return mapping.get(value, value)
df = filtered_df2.applymap(apply_mapping)
df

# Instagram Direct take out
# Any channel = Instagram

# remove all rows with Instagram Direct value


  warn("Workbook contains no default style, apply openpyxl's default")
  df = filtered_df2.applymap(apply_mapping)


Unnamed: 0,Ticket ID,Ticket channel,Assignee ID,Assignee name,Requester ID,Requester name,Ticket subject,Requester created - Timestamp,Ticket created - Timestamp,Ticket solved - Timestamp
0,3777302,Facebook,19918762073497,Agent Socmed 5,32077763614745,E Sae,bikin brimo yg kblokir bsa gk y?,2024-05-06 00:02:21.000000,2024-05-06 00:02:21.000000,2024-05-06 00:05:22.000000
2,3777304,Instagram,19918762073497,Agent Socmed 5,32077755960985,nurul_alamin,[IGDM] Kk cara ganti no HP di aplikas... - @nu...,2024-05-06 00:02:29.000000,2024-05-06 00:02:29.000000,2024-05-06 05:43:13.000000
3,3777306,Twitter Direct Message,405258199354,Contact BRI,32077776751641,namaku,Conversation with namaku,2024-05-06 00:03:51.000000,2024-05-06 00:03:52.000000,2024-05-06 14:46:00.000000
4,3777309,Facebook,19918762073497,Agent Socmed 5,32077861374233,Tok Bagus,Menurut gw mirip ni 2 orangðŸ—¿,2024-05-06 00:07:13.000000,2024-05-06 00:07:13.000000,2024-05-06 00:07:39.000000
5,3777311,Twitter Direct Message,405258199354,Contact BRI,32077902878361,âœ°,Conversation with âœ°,2024-05-06 00:07:56.000000,2024-05-06 00:07:56.000000,2024-05-06 12:37:50.000000
...,...,...,...,...,...,...,...,...,...,...
1181,3779914,Twitter,405257335633,Agent Sosmed 3,32118165205913,Hana Dwi,Brimo error gimana ya? Ga bisa ke buka https:/...,2024-05-07 04:54:12.000000,2024-05-07 04:54:20.000000,2024-05-07 04:56:37.000000
1182,3779935,Facebook,405303061074,Agent Sosmed 2,32118841105689,Ahmad Yani,Klw bisa saat mengambil atau memotong Uang di...,2024-05-07 05:41:57.000000,2024-05-07 05:41:57.000000,2024-05-07 05:51:30.000000
1183,3779940,Instagram,405303061074,Agent Sosmed 2,32119016699161,muhammadakbardurimalang,[IGDM] Selamat pagi - @muhammadakbardurimal...,2024-05-07 05:50:27.000000,2024-05-07 05:50:27.000000,2024-05-07 05:53:39.000000
1184,3779942,Twitter,405257335633,Agent Sosmed 3,32119084456601,Kang TimbulðŸ‡®ðŸ‡©ðŸ‡µðŸ‡¸,@promo_BRI Kalau uang hilang bagaimana?,2024-05-07 05:54:21.000000,2024-05-07 05:54:21.000000,2024-05-07 05:57:39.000000
