In [4]:
import pandas as pd

In [6]:
# Read the Excel sheet
print("Reading Excel sheet 'transpose_categories'...")
df = pd.read_excel(r'..\acts_and_miracles\acts_and_miracles_preprep.xlsx', sheet_name='transpose_categories', header=0)

# Rename the columns
print(f"Original shape: {df.shape}")
print(f"Original columns: {list(df.columns)}")

Reading Excel sheet 'transpose_categories'...
Original shape: (678, 11)
Original columns: ['EventID', 'Category 1', 'Category 2', 'Category 3', 'Category 4', 'Category 5', 'Category 6', 'Category 7', 'Category 8', 'Category 9', 'Category 10']


In [7]:
# Create new column names
new_columns = ['EventID']
for i in range(1, len(df.columns)):
    new_columns.append(f'Category {i}')

df.columns = new_columns
print(f"Renamed columns: {list(df.columns)}")

# Get all category column names (everything except EventID)
category_columns = [col for col in df.columns if col.startswith('Category')]
print(f"Category columns to transpose: {category_columns}")

# Transpose (melt) the category columns into rows
print("Transposing category columns into rows...")
transposed_df = pd.melt(
    df, 
    id_vars=['EventID'], 
    value_vars=category_columns,
    var_name='CategoryType',  # This will hold "Category 1", "Category 2", etc.
    value_name='Category'     # This will hold the actual category values
)

# Remove rows where Category is null/empty
transposed_df = transposed_df.dropna(subset=['Category'])
transposed_df = transposed_df[transposed_df['Category'].astype(str).str.strip() != '']

# Keep only EventID and Category columns (drop the CategoryType column)
final_df = transposed_df[['EventID', 'Category']].copy()

# Sort by EventID for better organization
final_df = final_df.sort_values('EventID').reset_index(drop=True)

print(f"Transposed shape: {final_df.shape}")
print(f"Final columns: {list(final_df.columns)}")



Renamed columns: ['EventID', 'Category 1', 'Category 2', 'Category 3', 'Category 4', 'Category 5', 'Category 6', 'Category 7', 'Category 8', 'Category 9', 'Category 10']
Category columns to transpose: ['Category 1', 'Category 2', 'Category 3', 'Category 4', 'Category 5', 'Category 6', 'Category 7', 'Category 8', 'Category 9', 'Category 10']
Transposing category columns into rows...
Transposed shape: (1437, 2)
Final columns: ['EventID', 'Category']


In [8]:
final_df

Unnamed: 0,EventID,Category
0,1,Creation
1,2,Speech
2,2,Creation
3,3,Creation
4,3,Speech
...,...,...
1432,691,Fulfilment
1433,691,Recovery
1434,691,Victory
1435,692,Fulfilment


In [12]:
final_df[final_df.EventID==182]

Unnamed: 0,EventID,Category
370,182,Theophany
371,182,Trumpet
372,182,Speech
373,182,Smoke
374,182,Presence
375,182,Lightning
376,182,Fire
377,182,Thunder
378,182,Cloud
379,182,Warning


In [13]:
# Export to CSV
output_filename = 'transposed_categories.csv'
final_df.to_csv(r'..\acts_and_miracles\transposed_categories.csv', index=False)

print(f"\nProcessing complete! File saved as '{output_filename}'")
print(f"\nPreview of the result:")
print(final_df.head(15))

# Show some statistics
unique_events = final_df['EventID'].nunique()
total_categories = len(final_df)
avg_categories_per_event = total_categories / unique_events if unique_events > 0 else 0

print(f"\nStatistics:")
print(f"Unique EventIDs: {unique_events}")
print(f"Total category entries: {total_categories}")
print(f"Average categories per EventID: {avg_categories_per_event:.2f}")

# Show example of categories for first few EventIDs
print(f"\nSample data - Categories for first few EventIDs:")
for event_id in final_df['EventID'].unique()[:3]:
    categories = final_df[final_df['EventID'] == event_id]['Category'].tolist()
    print(f"EventID {event_id}: {categories}")


Processing complete! File saved as 'transposed_categories.csv'

Preview of the result:
    EventID  Category
0         1  Creation
1         2    Speech
2         2  Creation
3         3  Creation
4         3    Speech
5         4  Creation
6         4    Speech
7         5  Creation
8         5    Speech
9         6    Speech
10        6  Creation
11        7    Speech
12        7  Creation
13        8  Creation
14        8    Speech

Statistics:
Unique EventIDs: 678
Total category entries: 1437
Average categories per EventID: 2.12

Sample data - Categories for first few EventIDs:
EventID 1: ['Creation']
EventID 2: ['Speech', 'Creation']
EventID 3: ['Creation', 'Speech']
