## Detail Column of Data 2023 for BRICARE

In [1]:
import pandas as pd

def process_text_data(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        lines = file.readlines()

    entries = []
    current_entry = []
    current_ticket_id = None

    for line in lines:
        if line.startswith('TTB'):
            if current_entry:  # If there's collected content, append it with the Ticket ID
                entries.append((current_ticket_id, '\n'.join(current_entry)))
                current_entry = []
            # Extract Ticket ID and the part after the third comma
            parts = line.split(',', 3)
            if len(parts) > 3:
                current_ticket_id = parts[0]  # Store the Ticket ID
                current_entry.append(parts[3].strip())  # Start collecting the entry content
            continue
        current_entry.append(line.strip())

    # Add the last collected entry if any
    if current_entry:
        entries.append((current_ticket_id, '\n'.join(current_entry)))

    return entries

# Path to your text file
file_path = r"C:\Users\maste\Downloads\bricare_case_januari2023_2_details.txt"
processed_data = process_text_data(file_path)

# Create a DataFrame from the processed data
df_final = pd.DataFrame(processed_data, columns=['Ticket ID', 'Content'])

# Adjust the first entry in the Content column if necessary
if df_final.iloc[0]['Ticket ID'] and df_final.iloc[0]['Content'].startswith(df_final.iloc[0]['Ticket ID']):
    # Remove the Ticket ID and any two characters before it from the content
    df_final.at[0, 'Content'] = df_final.iloc[0]['Content'][len(df_final.iloc[0]['Ticket ID'])+2:]

# Save the DataFrame to a CSV file
# df_final.to_csv('exp_5.csv', index_label='Index')

df_final.head()  # Display the first few rows to check the output


Unnamed: 0,Ticket ID,Content
0,,"﻿TTB000043833835,8701,Blokir Kartu ATM karena ..."
1,TTB000043833951,#BRILINKMOB\n\nDATA outlet BRILINK\nKode Outle...
2,TTB000043833734,"#CALL TERPUTUS\n\nif ch call back ,layanan IB ..."
3,TTB000043833965,Nasabah gagal melakukan transaksi tarik tunai ...
4,TTB000043833833,"ch infokan melakukan registrasi brimo, namun m..."


## Modify the first line

In [5]:
import pandas as pd

# Load the dataset
data = pd.read_csv('D:\dataquality\exp_5.csv', encoding='utf-8-sig')  # Handles any encoding issues like BOM

# Initialize a list to store processed data
processed_data = []

# Process the first line
first_line = data.loc[0, 'Content']
ticket_id_first_line = first_line.split(',')[0]
content_after_third_comma_first_line = ','.join(first_line.split(',')[3:]) if len(first_line.split(',')) > 3 else None
processed_data.append({'Ticket ID': ticket_id_first_line, 'Content': content_after_third_comma_first_line})

# Process the rest of the lines normally
for index, row in data.iterrows():
    if index == 0:  # Skip the first line as it is already processed
        continue
    if pd.notna(row['Ticket ID']) and row['Ticket ID'].startswith('TTB'):
        content_parts = row['Content'].split(',')
        processed_content = ','.join(content_parts[3:]) if len(content_parts) > 3 else row['Content']
    else:
        processed_content = row['Content']  # Keep the original content if not starting with TTB
    processed_data.append({'Ticket ID': row['Ticket ID'], 'Content': processed_content})

# Create a DataFrame from the processed data
result_df = pd.DataFrame(processed_data)


# result_df.to_csv('exp_6.csv', index=False)

result_df


Unnamed: 0,Ticket ID,Content
0,﻿TTB000043833835,Nasabah mengajukan pemblokiran kartu ATM BRI\n...
1,TTB000043833951,#BRILINKMOB\n\nDATA outlet BRILINK\nKode Outle...
2,TTB000043833734,", nomor tt\n\n\n\nNasabah mengajukan pemblok..."
3,TTB000043833965,Nasabah gagal melakukan transaksi tarik tunai ...
4,TTB000043833833,"ch infokan melakukan registrasi brimo, namun m..."
...,...,...
364802,TTB000044335239,"Saldo Berkurang,Nasabah gagal melakukan transa..."
364803,TTB000044335249,Nasabah gagal melakukan transaksi tarik tunai ...
364804,TTB000044335135,"\nADDR 2, ,JL RAYA MERDEKA NO 110 ,\nAD..."
364805,TTB000044335255,"Saldo Berkurang,Nasabah gagal melakukan transa..."


## Data before 2023

In [6]:
import pandas as pd


column_list = [
    "Ticket_ID",  
    "Call_Type_ID",  
    "Call_Type", 
    "Create_Date",  
    "gateway",  
    "Jenis_Laporan",  
    "Nama_Nasabah",  
    "No_Rekening", 
    "Nominal",  
    "status",  
    "TanggalClosed", 
    "tanggalTransaksi",  
    "Chanel",  
    "Fitur",  
    "Nomor_Kartu", 
    "user_group",  
    "assgined_to",  
    "attachment_done",  
    "email",  
    "full_name",  
    "no_telepon",  
    "approver_login",  
    "approver_name",  
    "SLAResolution",  
    "submitter_login_id",  
    "submitter_user_group", 
    "user_login_name"  
]

path=r"C:\Users\maste\Downloads\BRICARE_25042024 masking.csv"
data=pd.read_csv(path, delimiter=';')

# Convert Column1 to string if not already, and apply the regex filter for "TTB" followed by numbers
data['Column1'] = data['Column1'].astype(str)
data_cleaned = data[data['Column1'].str.match(r'TTB\d+')]

# Ensure Column2 is an integer type and contains exactly four digits
data_cleaned['Column2'] = data_cleaned['Column2'].astype(str)
data_cleaned = data_cleaned[data_cleaned['Column2'].str.match(r'^\d{4}$')]

# Ensure all entries in Column4 can be converted to datetime and filter out those that can't
data_cleaned['Column4'] = pd.to_datetime(data_cleaned['Column4'], errors='coerce')
data_cleaned = data_cleaned.dropna(subset=['Column4'])

# Drop Column 28-32
data_to_drop=['Column28','Column29','Column30','Column31','Column32']
data_cleaned=data_cleaned.drop(columns=data_to_drop)


# Display the cleaned data again
# data_to_show=['Column28','Column29','Column30','Column31','Column32']
# data_cleaned=data_cleaned[data_to_show]

# Check all column containing NaN
# data_cleaned=data_cleaned.dropna()
# data_cleaned

# data_cleaned.columns = column_list[:len(data_cleaned.columns)]
    
if len(data_cleaned.columns) <= len(column_list):
    data_cleaned.columns = column_list[:len(data_cleaned.columns)]




data_cleaned



# df=df.iloc[:6]
# df.to_csv("not_cleanfordate.csv")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_cleaned['Column2'] = data_cleaned['Column2'].astype(str)


Unnamed: 0,Ticket_ID,Call_Type_ID,Call_Type,Create_Date,gateway,Jenis_Laporan,Nama_Nasabah,No_Rekening,Nominal,status,...,attachment_done,email,full_name,no_telepon,approver_login,approver_name,SLAResolution,submitter_login_id,submitter_user_group,user_login_name
0,TTB000026204763,8425,Pen-delete-an Status Registrasi Layanan yang A...,2020-01-01 07:19:37,Phone,Maintenance,Arif Budi Saputra,021234567890123,0.00,Closed,...,,,Really Artha Ully Manik,081234567890,,,20,90136590,,Really Artha Ully Manik
1,TTB000026204728,8405,Kartu ATM BRI Tertelan di MESIN ATM,2020-01-01 07:19:30,Phone,Information,Arif Budi Saputra,021234567890123,0.00,Closed,...,,,DELLA LARASSARI,081234567890,90022934,Adhi Nitidharma,20,90135196,,DELLA LARASSARI
2,TTB000026204747,8202,Informasi Product Banking,2020-01-01 07:19:27,Phone,Information,Arif Budi Saputra,021234567890123,741700.00,Closed,...,,,Kartika Fitriani,081234567890,,,20,90141079,LCC-CCTCALL,Kartika Fitriani
4,TTB000026204659,8812,Nasabah BRI gagal tarik tunai & terdebet di AT...,2020-01-01 07:00:34,Phone,Information,Arif Budi Saputra,021234567890123,1000000.00,Closed,...,,,AMELIA RAHMADANI,081234567890,00000723,Ismail,10,60443,LCC-CCTCALL,AMELIA RAHMADANI
5,TTB000026204577,8202,Informasi Product Banking,2020-01-01 07:09:29,Phone,Information,Arif Budi Saputra,021234567890123,500000.00,Closed,...,,bacuelkueh@gmail.com,Amalia Fitriana,081234567890,,,20,60422,LCC-CCTCALL,Amalia Fitriana
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
375778,TTB000026715685,8411,Salah Transfer antar BRI,2020-01-31 19:32:26,Phone,Complain,HERIWATI,546501003139532,100001.00,Closed,...,0,,ADE SUTISNA,082387480456,,,20,90135194,LCC-CCTCALL,ADE SUTISNA
375779,TTB000026713843,8411,Salah Transfer antar BRI,2020-01-31 17:28:34,Phone,Complain,MUTIAH,609401002283508,1950000.00,Closed,...,,,Umar Fahruddin Pratama,082136107896,,,20,90135689,,Umar Fahruddin Pratama
375780,TTB000026714586,7700,Komplain Transaksi Kartu Kredit tidak di akui,2020-01-31 18:24:32,Phone,Complain,MULIYA HARDIYANTO,,0.00,Closed,...,,,Cintia Fadila,05264513380,,,67,90123773,LCC-CCTCALL,Cintia Fadila
375782,TTB000026713292,8411,Salah Transfer antar BRI,2020-01-31 16:56:02,Phone,Complain,S I M O N,064201002986507,9650000.00,Closed,...,0,,Alfera Dyah Pangestu,081241313888,,,20,90138706,,Alfera Dyah Pangestu
