In [2]:
import pandas as pd
import re
import urllib.parse

In [6]:
df = pd.read_csv('C:\\Users\\Putu Mahendrayana\\Documents\\Dark_Network\\Data\\RAW.csv')
df.info

<bound method DataFrame.info of             Character Name                                       Image Source  \
0             Selma_Ahrens  https://static.wikia.nocookie.net/dark-netflix...   
1            Helene_Albers  https://static.wikia.nocookie.net/dark-netflix...   
2           Hermann_Albers  https://static.wikia.nocookie.net/dark-netflix...   
3            Eva%27s_World  https://static.wikia.nocookie.net/dark-netflix...   
4            Eva%27s_World  https://static.wikia.nocookie.net/dark-netflix...   
..                     ...                                                ...   
72           Eva%27s_World  https://static.wikia.nocookie.net/dark-netflix...   
73                 Unknown  https://static.wikia.nocookie.net/dark-netflix...   
74  Bernadette_W%C3%B6ller  https://static.wikia.nocookie.net/dark-netflix...   
75      Hannah_W%C3%B6ller  https://static.wikia.nocookie.net/dark-netflix...   
76      Torben_W%C3%B6ller  https://static.wikia.nocookie.net/dark-netflix...

In [7]:
# Function to extract character name from the link
def extract_character_name(link):
    match = re.search(r'/wiki/([^/]+)', link)
    if match:
        return match.group(1).replace('_', ' ')
    return None

# Apply the function to the 'link' column and create a new 'Character Name' column
df['Character Name'] = df['Wiki Link'].apply(extract_character_name)

# Assign NodeIDs to each character row
df['NodeID'] = range(1, len(df) + 1)

# Display the modified DataFrame
print(df)

            Character Name                                       Image Source  \
0             Selma Ahrens  https://static.wikia.nocookie.net/dark-netflix...   
1            Helene Albers  https://static.wikia.nocookie.net/dark-netflix...   
2           Hermann Albers  https://static.wikia.nocookie.net/dark-netflix...   
3     Aleksander Tiedemann  https://static.wikia.nocookie.net/dark-netflix...   
4        Bartosz Tiedemann  https://static.wikia.nocookie.net/dark-netflix...   
..                     ...                                                ...   
72          Ulrich Nielsen  https://static.wikia.nocookie.net/dark-netflix...   
73                 Unknown  https://static.wikia.nocookie.net/dark-netflix...   
74  Bernadette W%C3%B6ller  https://static.wikia.nocookie.net/dark-netflix...   
75      Hannah W%C3%B6ller  https://static.wikia.nocookie.net/dark-netflix...   
76      Torben W%C3%B6ller  https://static.wikia.nocookie.net/dark-netflix...   

                           

In [9]:
# Function to decode URL-encoded characters and replace underscores with spaces
def decode_and_clean_name(link):
    decoded_name = urllib.parse.unquote(link)
    cleaned_name = decoded_name.replace('_', ' ')
    return cleaned_name

# Apply the function to the 'Character Name' column
df['Character Name'] = df['Character Name'].apply(decode_and_clean_name)


In [12]:
# Function to modify the family relations text
def modify_family_relations(relations):
    if pd.isna(relations):
        return relations  # Return NA value as is
    
    modified_relations = []
    segments = re.split(r'\)(?=\S)', relations)
    
    for segment in segments:
        parts = segment.split('(')
        if len(parts) == 2:
            name = parts[0].strip().replace('†', '')  # Remove † symbol
            role = parts[1].strip()
            modified_relations.append(f"{name}_{role}")  # Remove the space after underscore
        else:
            modified_relations.append(segment.strip())
    return ','.join(modified_relations)
# Apply the function to the 'Family Relations' column
df['Modified Family Relations'] = df['Family Relations'].apply(modify_family_relations)
# Display the modified DataFrame
df.info

<bound method DataFrame.info of           Character Name                                       Image Source  \
0           Selma Ahrens  https://static.wikia.nocookie.net/dark-netflix...   
1          Helene Albers  https://static.wikia.nocookie.net/dark-netflix...   
2         Hermann Albers  https://static.wikia.nocookie.net/dark-netflix...   
3   Aleksander Tiedemann  https://static.wikia.nocookie.net/dark-netflix...   
4      Bartosz Tiedemann  https://static.wikia.nocookie.net/dark-netflix...   
..                   ...                                                ...   
72        Ulrich Nielsen  https://static.wikia.nocookie.net/dark-netflix...   
73               Unknown  https://static.wikia.nocookie.net/dark-netflix...   
74     Bernadette Wöller  https://static.wikia.nocookie.net/dark-netflix...   
75         Hannah Wöller  https://static.wikia.nocookie.net/dark-netflix...   
76         Torben Wöller  https://static.wikia.nocookie.net/dark-netflix...   

                   

In [13]:
# Create an empty list to store rows
rows = []

# Iterate through each row
for index, row in df.iterrows():
    source = row['Character Name']
    modified_relations = row['Modified Family Relations']
    
    # Skip processing if modified_relations is NA
    if pd.isna(modified_relations):
        continue
    
    target_relations = modified_relations.split(',')
    
    # Iterate through each relation in the 'Modified Family Relations'
    for relation in target_relations:
        # Split the relation by underscore (_)
        relation_parts = relation.split('_')
        
        # Check if the relation is in the correct format
        if len(relation_parts) == 2:
            target, relation_type = relation_parts
            rows.append([source, target, relation_type])

# Create a new DataFrame with the extracted relationships
new_df = pd.DataFrame(rows, columns=['Source Node', 'Target Node', 'Relation'])

# Display the new DataFrame
print(new_df)

           Source Node        Target Node  \
0        Helene Albers  Katharina Nielsen   
1        Helene Albers     Ulrich Nielsen   
2        Helene Albers     Magnus Nielsen   
3        Helene Albers     Martha Nielsen   
4        Helene Albers     Mikkel Nielsen   
..                 ...                ...   
585  Bernadette Wöller      Peter Doppler   
586  Bernadette Wöller      Hannah Wöller   
587      Hannah Wöller      Torben Wöller   
588      Hannah Wöller       Benni Wöller   
589      Torben Wöller       Unborn child   

                                              Relation  
0                                             daughter  
1    son-in-law/great-great-great-grandson/great-gr...  
2                                             grandson  
3                                        granddaughter  
4                                             grandson  
..                                                 ...  
585                        Boyfriend in original world  
586

In [14]:
new_df['Relation'] = new_df['Relation'].str.replace(')', '')

In [15]:
# Split the 'Relation' column and create new rows
new_rows = []

for index, row in new_df.iterrows():
    source = row['Source Node']
    target = row['Target Node']
    relations = row['Relation'].split('/')
    
    for relation in relations:
        new_rows.append([source, target, relation.strip()])

# Create a new DataFrame with the split relations
df_final = pd.DataFrame(new_rows, columns=['Source Node', 'Target Node', 'Relation'])
df_final.info

<bound method DataFrame.info of            Source Node        Target Node                          Relation
0        Helene Albers  Katharina Nielsen                          daughter
1        Helene Albers     Ulrich Nielsen                        son-in-law
2        Helene Albers     Ulrich Nielsen        great-great-great-grandson
3        Helene Albers     Ulrich Nielsen  great-great-great-great-grandson
4        Helene Albers     Magnus Nielsen                          grandson
..                 ...                ...                               ...
731  Bernadette Wöller      Hannah Wöller       sister-in-law Unborn nephew
732  Bernadette Wöller      Hannah Wöller                             niece
733      Hannah Wöller      Torben Wöller                           husband
734      Hannah Wöller       Benni Wöller                     sister-in-law
735      Torben Wöller       Unborn child                    Original World

[736 rows x 3 columns]>

In [17]:
df_final.to_csv('dark_output.csv', index=False)

OSError: Cannot save file into a non-existent directory: 'Data'