In [3]:
import pandas as pd

# Read the CSV file into a DataFrame
df = pd.read_csv("data/WorkDetails.csv")

# 01. Print records count
records_count = len(df)
print("Records Count:", records_count)

# 02. Remove duplicate records
df = df.drop_duplicates()

# 03. Convert all text data to uppercase
df = df.apply(lambda x: x.astype(str).str.upper() if x.name in ['Estate', 'Division', 'Work'] else x)

# 04. Merge Year, Month, and Day into a single column called DateofWork
df['DateofWork'] = df[['Year', 'Month', 'Day']].apply(lambda x: f"{x['Month']}/{x['Day']}/{x['Year']}", axis=1)

# 05. Add an Index column as the first column
df.insert(0, 'Index', range(1, 1 + len(df)))

# 06. Count duplicate records based on DateofWork, Division, and EmpCode
duplicate_records_count = df[df.duplicated(subset=['DateofWork', 'Division', 'EmpCode'])].shape[0]

# 07. Print the Index list for duplicate DateofWork, Division, and EmpCode
duplicate_indexes = df[df.duplicated(subset=['DateofWork', 'Division', 'EmpCode'])].index.tolist()
print("Index List for Duplicate DateofWork, Division, and EmpCode:", duplicate_indexes)

# 08. Save the output to WorkDetails_modified.csv
df.to_csv("data/WorkDetails_modified.csv", index=False)


Records Count: 1048575
Index List for Duplicate DateofWork, Division, and EmpCode: [10896, 13000, 19367, 19981, 21348, 22659, 29731, 35051, 36500, 38718, 41005, 48040, 50769, 54873, 64456, 82218, 84476, 88804, 90289, 91099, 91753, 93662, 95545, 96143, 98203, 104406, 107321, 108204, 108271, 108496, 110724, 112904, 114711, 117359, 119442, 121679, 123502, 123514, 123557, 124744, 125363, 125591, 125660, 126181, 127431, 127634, 128371, 128428, 128644, 128800, 129498, 135165, 136681, 138789, 139795, 140318, 140349, 140658, 140830, 142500, 144038, 144252, 144872, 145234, 145460, 147074, 147207, 150365, 151208, 151605, 154732, 154953, 155710, 155789, 156784, 157110, 158210, 159990, 160692, 161413, 162027, 162573, 163004, 164128, 165121, 166794, 167965, 168944, 169143, 169214, 169722, 169912, 171355, 172016, 173232, 173253, 173286, 173374, 174591, 175298, 175715, 176764, 176765, 178136, 178775, 179156, 179216, 179310, 179383, 179873, 181603, 181943, 182941, 182942, 182996, 183775, 184037, 18425