In [6]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.preprocessing import MinMaxScaler

In [7]:
# Load the spells data
spells_df = pd.read_csv('spells.csv', sep=None, engine='python', encoding='utf-8-sig')
spell_incantations = spells_df['Incantation'].dropna().str.strip().str.lower().tolist()

# List of dialogue files, with movie numbers
dialogue_files = {
    'hp1.csv': 1,
    'hp2.csv': 2,
    'hp3.csv': 3
}

# List to gather all data for a combined DataFrame
all_data = []

# Process each dialogue file
for file, movie_number in dialogue_files.items():
    hp_dialogue_df = pd.read_csv(file, sep=None, engine='python', encoding='utf-8-sig')

    # Process each line in the dialogue file
    for index, row in hp_dialogue_df.iterrows():
        line_number = index + 1  # Line number (1-indexed)
        sentence = row['Sentence'].lower()  # Convert sentence to lowercase
        caster = row['Character']  # Character speaking the line
        
        # Check if any known spell incantation is in the sentence
        incantation = next((spell for spell in spell_incantations if spell in sentence), None)
        
        if incantation:
            victim = None
            if " you " in sentence:
                victim = "unspecified target"
            elif "Hermione" in sentence:
                victim = "Hermione"
            elif "Harry" in sentence:
                victim = "Harry"
            elif "Ron" in sentence:
                victim = "Ron"
            
            # Append movie data with movie number
            all_data.append((movie_number, line_number, caster, incantation, victim))

# Create a single DataFrame with an additional 'movie_number' column
combined_df = pd.DataFrame(all_data, columns=["movie_number", "line_number", "caster", "incantation", "victim"])

# Display the combined DataFrame
print(combined_df)

# Optionally, save to CSV
combined_df.to_csv('Combined_Extracted_Spell_Data_With_Movie_Number.csv', index=False)


FileNotFoundError: [Errno 2] No such file or directory: 'hp1.csv'

In [None]:
#turn df to csv and download, uncomment to download what u want
#df_og_version.to_csv("og_version.csv", sep=',', encoding='utf-8')

#df_taylors_version.to_csv("taylors_version.csv", sep=',', encoding='utf-8')

#df_top_5_all_albums_sorted.to_csv("top_5.csv", sep=',', encoding='utf-8')

#df_top_5_all_songs.to_csv("top_5_general.csv", sep=',', encoding='utf-8')

combined_df.to_csv("movies1_3.csv", sep=',', encoding='utf-8')


In [None]:
import pandas as pd

# Load the spells data
spells_df = pd.read_csv('spells.csv', sep=None, engine='python', encoding='utf-8-sig')
spell_incantations = spells_df['Incantation'].dropna().str.strip().str.lower().tolist()

# Load your single dialogue file
dialogue_file_path = 'dialogue.csv'  # Replace with the correct file path
dialogue_df = pd.read_csv(dialogue_file_path, sep=None, engine='python', encoding='utf-8-sig')

# List to store all collected data
all_data = []

# Iterate over each row in the dialogue DataFrame
for index, row in dialogue_df.iterrows():
    line_number = row['Dialogue_ID']  # Get the dialogue ID
    sentence = row['Dialogue'].lower()  # Convert the dialogue to lowercase
    caster = row['Character_ID']  # The ID of the character speaking

    # Check if any known spell incantation is present in the sentence
    incantation = next((spell for spell in spell_incantations if spell in sentence), None)

    if incantation:
        # Identify potential victim based on keywords in the sentence
        victim = None
        if " you " in sentence:
            victim = "unspecified target"
        elif "hermione" in sentence:
            victim = "Hermione"
        elif "harry" in sentence:
            victim = "Harry"
        elif "ron" in sentence:
            victim = "Ron"

        # Append the collected data to the list
        all_data.append((row['Chapter_ID'], line_number, caster, incantation, victim))

# Create a DataFrame from the collected data
combined_df = pd.DataFrame(all_data, columns=["Chapter_ID", "Dialogue_ID", "Character_ID", "Incantation", "Victim"])

# Display the combined DataFrame
print(combined_df)


     Chapter_ID  Dialogue_ID  Character_ID       Incantation  \
0             7          160            43            finite   
1             9          186            26              pack   
2            10          213             2              pack   
3            10          226             3     oculus reparo   
4            14          319             1           unknown   
..          ...          ...           ...               ...   
150         223         7160            99     avada kedavra   
151         223         7161             1      expelliarmus   
152         225         7273             6  expecto patronum   
153         231         7396             1         confringo   
154         232         7406             1         confringo   

                 Victim  
0                  None  
1                  None  
2    unspecified target  
3    unspecified target  
4                   Ron  
..                  ...  
150                None  
151                None

In [None]:
# Load the character data
character_file_path = 'characters.csv'  # Replace with the correct file path
character_df = pd.read_csv(character_file_path, sep=None, engine='python', encoding='utf-8-sig')

print(character_df)

     Character_ID    Character_Name                Species  Gender  \
0               1      Harry Potter                  Human    Male   
1               2       Ron Weasley                  Human    Male   
2               3  Hermione Granger                  Human  Female   
3               4  Albus Dumbledore                  Human    Male   
4               5     Rubeus Hagrid  Half-Human/Half-Giant    Male   
..            ...               ...                    ...     ...   
161           162            Waiter                    NaN     NaN   
162           163             Boy 2                    NaN     NaN   
163           164             Crowd                    NaN     NaN   
164           165       Gryffindors                    NaN     NaN   
165           166        Professors                    NaN     NaN   

          House              Patronus Wand (Wood)         Wand (Core)  
0    Gryffindor                  Stag       Holly     Phoenix Feather  
1    Gryffindor

In [None]:
# Merge combined_df with character_df to add Character_Name and House
combined_df = combined_df.merge(
    character_df[['Character_ID', 'Character_Name', 'House']],
    left_on='Character_ID',
    right_on='Character_ID',
    how='left'
)

# Drop the redundant 'Character ID' column from the character data
combined_df.drop(columns=['Character_ID'], inplace=True)

# Rename columns for clarity
combined_df.rename(columns={'Character_Name': 'Caster'}, inplace=True)


# Display the updated combined DataFrame
print(combined_df)

     Chapter_ID  Dialogue_ID       Incantation              Victim  \
0             7          160            finite                None   
1             9          186              pack                None   
2            10          213              pack  unspecified target   
3            10          226     oculus reparo  unspecified target   
4            14          319           unknown                 Ron   
..          ...          ...               ...                 ...   
150         223         7160     avada kedavra                None   
151         223         7161      expelliarmus                None   
152         225         7273  expecto patronum                 Ron   
153         231         7396         confringo                None   
154         232         7406         confringo                None   

                 Caster       House  
0    Garrick Ollivander   Ravenclaw  
1         Molly Weasley  Gryffindor  
2           Ron Weasley  Gryffindor  
3      

In [None]:
combined_df = combined_df[combined_df['Incantation'] != 'unknown']
combined_df = combined_df[combined_df['Incantation'] != 'none']



In [None]:
# Group by Character_Name and count the number of spells cast
spell_counts = combined_df.groupby('Caster').size().reset_index(name='Spell_Count')

# Sort the results in descending order of Spell_Count
spell_counts = spell_counts.sort_values(by='Spell_Count', ascending=False)

# Display the grouped and sorted DataFrame
print(spell_counts)

                 Caster  Spell_Count
14         Harry Potter           49
15     Hermione Granger           30
27          Ron Weasley           13
25          Remus Lupin            7
28        Severus Snape            5
22   Neville Longbottom            5
12    Gilderoy Lockhart            4
1      Albus Dumbledore            4
6          Draco Malfoy            3
16         James Potter            2
29         Sirius Black            2
18        Luna Lovegood            2
31            Voldemort            2
7       Filius Flitwick            2
10       George Weasley            2
2                   All            2
8          Fred Weasley            2
4   Bellatrix Lestrange            2
5      Dolores Umbridge            2
24           Professors            1
30           Tom Riddle            1
3        Arthur Weasley            1
26        Rolanda Hooch            1
13        Gregory Goyle            1
23        Parvati Patil            1
11               Ghosts            1
2

# Web scraping Wiki pages for spell data:

In [2]:
from bs4 import BeautifulSoup
import requests

In [3]:
url = "https://harrypotter.fandom.com/wiki/Killing_Curse"
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')

table_rows = soup.findAll('table')[2].findAll('tr')[1:]

caster_value, victim_value, date_value, notes_value = None, None, None, None
caster_span, victim_span, date_span, notes_span = 0, 0, 0, 0
rows_data = []

for row in table_rows:
    cells = row.find_all('td')
    if not cells:
        continue  
    
    row_caster, row_victim, row_date, row_notes = None, None, None, None
    caster_first_span, victim_first_span, date_first_span = False, False, False

    if caster_span == 0:
        if cells[0].has_attr('rowspan'):
            caster_first_span = True
            caster_span = int(cells[0]['rowspan']) - 1
            row_caster = cells[0].get_text(strip=True)
        else:
            row_caster = cells[0].get_text(strip=True)
            caster_first_span = False
    else:
        caster_span -= 1
        row_caster = caster_value
        caster_first_span = False

    if caster_span != 0:
        victim_index = 0
    else:
        victim_index = 1
        
    victim_index = 1
    if victim_span == 0:
        if len(cells) > 1 and cells[victim_index].has_attr('rowspan'):
            victim_span = int(cells[victim_index]['rowspan']) - 1
            row_victim = cells[victim_index].get_text(strip=True)
        elif len(cells) > 1:
            row_victim = cells[victim_index].get_text(strip=True)
    else:
        victim_span -= 1
        row_victim = victim_value
 
    date_index = 2
    if date_span == 0:
        if len(cells) > 2 and cells[date_index].has_attr('rowspan'):
            date_span = int(cells[date_index]['rowspan']) - 1
            row_date = cells[date_index].get_text(strip=True)
        elif len(cells) > 2:
            row_date = cells[date_index].get_text(strip=True)
    else:
        date_span -= 1
        row_date = date_value

    caster_value = row_caster if row_caster is not None else caster_value
    victim_value = row_victim if row_victim is not None else victim_value
    date_value = row_date if row_date is not None else date_value
   
    rows_data.append({
        "caster": row_caster,
        "victim": row_victim,
        "date": row_date,
        "spell": "Avada Kedavra"
    })

wiki_spell_data = pd.DataFrame(rows_data)

print(wiki_spell_data)


                                               caster  \
0                                           San Bakar   
1                                     Victor Rookwood   
2                                    Sebastian Sallow   
3                      Gellert Grindelwald's acolytes   
4                      Gellert Grindelwald's acolytes   
5                                              Carrow   
6                                     Gunnar Grimmson   
7                                        Cassius Bell   
8                                      Lord Voldemort   
9                                      Lord Voldemort   
10                                     Lord Voldemort   
11                                     Lord Voldemort   
12                                     Lord Voldemort   
13                                     Lord Voldemort   
14                                     Lord Voldemort   
15                                     Lord Voldemort   
16                             

# Spell Stats Data Cleaning

In [4]:
spell_stats_file_path = 'spell_stats_raw.csv' 
spell_stats_df = pd.read_csv(spell_stats_file_path, sep=None, engine='python', encoding='utf-8-sig')

print(spell_stats_df)

                          Name    Damage  Cooldown  Duration  \
0                        Accio         0      8.00       0.5   
1                    Alohomora         0      0.50       0.0   
2                Ancient Magic       250      0.00       0.0   
3          Ancient Magic Throw        40      0.00       0.0   
4             Arresto Momentum         0     15.00      15.0   
5                Avada Kedavra  10000000     90.00       0.0   
6                   Basic Cast         3      0.00       0.0   
7   Basic Cast Heavy (4th hit)        23      0.00       0.0   
8                     Bombarda        35     15.00       0.0   
9                    Confringo        25     10.00       5.0   
10                      Crucio        88     20.00      20.0   
11                     Depulso        40     10.00       0.0   
12                    Descendo        11     10.00       2.0   
13                    Diffindo        45     15.00       0.0   
14             Disillusionment         0

In [None]:
spell_stats_file_path = 'spell_stats_raw.csv' 
spell_stats_df = pd.read_csv(spell_stats_file_path, sep=None, engine='python', encoding='utf-8-sig')

# Spells to keep
spell_names = [
    "Avada Kedavra",
    "Confringo",
    "Incendio",
    "Crucio",
    "Imperio",
    "Expelliarmus",
    "Petrificus Totalus"
]

# Filter out spells not used
spell_stats_df = spell_stats_df[spell_stats_df["Name"].isin(spell_names)]

# Fix NaN values to 0
spell_stats_df.fillna(0, inplace=True)

# Filter out columns not used
spell_stats_df = spell_stats_df[['Name', 'Damage', "Range", "Cooldown", "Duration", "CD OOC"]]

print(spell_stats_df)

log_transform_columns = ['Damage', 'Range', 'CD OOC']
log_df = spell_stats_df
for column in log_transform_columns:
    log_df[column] = np.log1p(log_df[column])

# print(log_df)

scaler = MinMaxScaler(feature_range=(1, 10))

# Select numeric columns for normalization
columns_to_normalize = ['Damage', 'Range', 'Cooldown', 'Duration', 'CD OOC']

# Normalize the data
df_min_max_scaled = log_df.copy() 
  
# apply normalization techniques 
for column in df_min_max_scaled.columns: 
    if column in columns_to_normalize:
        df_min_max_scaled[column] = (df_min_max_scaled[column] - df_min_max_scaled[column].min()) / (df_min_max_scaled[column].max() - df_min_max_scaled[column].min())    
  
df_min_max_scaled.replace(0, .05, inplace=True)
# view normalized data 
print(df_min_max_scaled)

spell_stats_df.to_csv("spell_stats_data.csv", sep=',', encoding='utf-8')




                  Name    Damage   Range  Cooldown  Duration  CD OOC
5        Avada Kedavra  10000000  3000.0     90.00       0.0     1.0
9            Confringo        25  3000.0     10.00       5.0    30.0
10              Crucio        88  3000.0     20.00      20.0    60.0
15        Expelliarmus        20  3000.0     10.00       0.0    30.0
18             Imperio         0  3000.0     30.00      20.0     1.0
19            Incendio        50   500.0      8.00       5.0    24.0
22  Petrificus Totalus       250   550.0      1.25       0.0     1.0
                  Name    Damage     Range  Cooldown  Duration    CD OOC
5        Avada Kedavra  1.000000  1.000000  1.000000      0.05  0.050000
9            Confringo  0.202139  1.000000  0.098592      0.25  0.801948
10              Crucio  0.278484  1.000000  0.211268      1.00  1.000000
15        Expelliarmus  0.188888  1.000000  0.098592      0.05  0.801948
18             Imperio  0.050000  1.000000  0.323944      1.00  0.050000
19        

In [41]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re

# Add descriptions to spells
urls = [
    {
        "spell": "Avada Kedavra",
        "url": "https://harrypotter.fandom.com/wiki/List_of_spells#A",
        "id": "Avada_Kedavra_(Killing_Curse)"
    }, 
    {
        "spell": "Confringo",
        "url": "https://harrypotter.fandom.com/wiki/List_of_spells#C",
        "id": "Confringo_(Blasting_Curse)"
    },
    {
        "spell": "Crucio",
        "url": "https://harrypotter.fandom.com/wiki/List_of_spells#C",
        "id": "Crucio_(Cruciatus_Curse)"
    },
    {
        "spell": "Expelliarmus",
        "url": "https://harrypotter.fandom.com/wiki/List_of_spells#E",
        "id": "Expelliarmus_(Disarming_Charm)"
    },
    {
        "spell": "Incendio",
        "url": "https://harrypotter.fandom.com/wiki/List_of_spells#I",
        "id": "Incendio_(Fire-Making_Spell)"
    },
    {
        "spell": "Petrificus Totalus",
        "url": "https://harrypotter.fandom.com/wiki/List_of_spells#P",
        "id": "Petrificus_Totalus_(Full_Body-Bind_Curse)"
    }
]

for spell in urls:
    response = requests.get(spell["url"])
    soup = BeautifulSoup(response.content, "html.parser")
    spell_section = soup.find(id=spell["id"])
    dl_tag = spell_section.find_next('dl')
    dd_tags = dl_tag.find_all('dd')
    description = dd_tags[2].get_text(strip=True)[12:]
    bracket_index = description.find('[')
    if bracket_index != -1:
        description = description[:bracket_index].strip()
    df_min_max_scaled.loc[df_min_max_scaled["Name"] == spell["spell"], "Description"] = description
   
df_min_max_scaled.loc[df_min_max_scaled["Name"] == "Imperio", "Description"] = 'Places the victim completely under the caster\'s control. The victim is put into a calm, trance-like state, and becomes unquestionably obedient to the commands of the caster. However, those who are strong-willed may learn to resist it. One of the three "Unforgivable Curses," the use of this curse on another human results in capital punishment or life sentence in Azkaban.'
# print(df_min_max_scaled)

df_min_max_scaled.to_csv("norm_spell_stats.csv", sep=',', encoding='utf-8', index=False)



In [None]:
response = requests.get("https://harrypotter.fandom.com/wiki/List_of_spells#I")
soup = BeautifulSoup(response.content, "html.parser")
spell_section = soup.find_all('span')
# spell_section = soup.find("Imperio_(Imperius_Curse)")
print(spell_section)
dl_tag = spell_section.find_next('dl')
dd_tags = dl_tag.find_all('dd')
print(dd_tags[2].get_text(strip=True)[12:])
# {
#     "spell": "Imperio",
#     "url": "https://harrypotter.fandom.com/wiki/List_of_spells#I",
#     "id": "Imperio_(Imperius_Curse)"
# },

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re

# URL of the webpage to scrape
url = "https://harrypotter.fandom.com/wiki/Stunning_Spell"

# Send a GET request to the page
response = requests.get(url)
soup = BeautifulSoup(response.content, "html.parser")

# Define the list of casters you are interested in
casters_of_interest = [
    "Harry Potter", "Ron Weasley", "Hermione Granger", "Neville Longbottom",
    "Albus Dumbledore", "Severus Snape", "Draco Malfoy", "Lord Voldemort",
    "Bellatrix Lestrange", "Peter Pettigrew", "Dolores Umbridge"
]

data = []

# two tables in wiki
tables = soup.find_all("table", {"class": "wikitable"})

for i, table in enumerate(tables):
    # First table is successful and second table is unsuccessful
    success_status = "Unsuccessful" if i == 1 else "Successful"
    
    current_caster = None

    for row in table.find_all("tr"):
        cells = row.find_all("td")
        
        if not cells:
            continue

        cell_text = cells[0].get_text(strip=True)
        
        if cell_text in casters_of_interest:
            current_caster = cell_text
        elif current_caster:
            victim = cell_text
            
            # Remove text in parentheses
            victim = re.sub(r'\(.*?\)|\[.*?\]', '', victim).strip()
            
            if victim and "employees" not in victim and "colleagues" not in victim:
                data.append({
                    "Caster": current_caster,
                    "Victim": victim,
                    "Success": success_status
                })

df = pd.DataFrame(data)

print(df)

df.to_csv("stunning_spell_data_with_success.csv", index=False)


                 Caster                                             Victim  \
0       Peter Pettigrew                                        Crookshanks   
1       Peter Pettigrew                            Bartemius Crouch Junior   
2          Harry Potter                                        Acromantula   
3          Harry Potter                                        Viktor Krum   
4          Harry Potter                                         Crabbe Snr   
5          Harry Potter                                     Walden Macnair   
6          Harry Potter                                       Death Eaters   
7          Harry Potter                                     Thorfinn Rowle   
8          Harry Potter                                     Mugglewaitress   
9      Dolores Umbridge                                      Corban Yaxley   
10     Dolores Umbridge                                      Lucius Malfoy   
11     Dolores Umbridge                                    Fenri

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re

url = "https://harrypotter.fandom.com/wiki/Killing_Curse"

response = requests.get(url)
soup = BeautifulSoup(response.content, "html.parser")

casters_of_interest = [
    "Harry Potter", "Ron Weasley", "Hermione Granger", "Neville Longbottom",
    "Albus Dumbledore", "Severus Snape", "Draco Malfoy", "Lord Voldemort",
    "Bellatrix Lestrange", "Peter Pettigrew", "Dolores Umbridge"
]

data = []

tables = soup.find_all("table", {"class": "wikitable"})

for i, table in enumerate(tables):
    success_status = "Unsuccessful" if i == 1 else "Successful"
    
    current_caster = None

    for row in table.find_all("tr"):
        cells = row.find_all("td")
        
        if not cells:
            continue

        cell_text = cells[0].get_text(strip=True)
        
        if cell_text in casters_of_interest:
            current_caster = cell_text
        elif current_caster:
            victim = cell_text
            
            victim = re.sub(r'\(.*?\)|\[.*?\]', '', victim).strip()
            
            if victim and "employees" not in victim and "colleagues" not in victim:
                data.append({
                    "Caster": current_caster,
                    "Victim": victim,
                    "Success": success_status
                })

df = pd.DataFrame(data)

print(df)

df.to_csv("killing_curse_data_with_success.csv", index=False)


                 Caster                                 Victim     Success
0        Lord Voldemort                          Thomas Riddle  Successful
1        Lord Voldemort                            Mary Riddle  Successful
2        Lord Voldemort                           James Potter  Successful
3        Lord Voldemort                            Lily Potter  Successful
4        Lord Voldemort                           Harry Potter  Successful
5        Lord Voldemort               31 October1981;2 May1998  Successful
6        Lord Voldemort                         Bertha Jorkins  Successful
7        Lord Voldemort                            Frank Bryce  Successful
8        Lord Voldemort                                 Fawkes  Successful
9        Lord Voldemort                        Charity Burbage  Successful
10       Lord Voldemort                          Alastor Moody  Successful
11       Lord Voldemort  Unnamed German speaking Muggle family  Successful
12       Lord Voldemort  

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re

url = "https://harrypotter.fandom.com/wiki/Cruciatus_Curse"

response = requests.get(url)
soup = BeautifulSoup(response.content, "html.parser")

casters_of_interest = [
    "Harry Potter", "Ron Weasley", "Hermione Granger", "Neville Longbottom",
    "Albus Dumbledore", "Severus Snape", "Draco Malfoy", "Lord Voldemort",
    "Bellatrix Lestrange", "Peter Pettigrew", "Dolores Umbridge"
]

data = []

tables = soup.find_all("table", {"class": "wikitable"})

for i, table in enumerate(tables):
    success_status = "Unsuccessful" if i == 1 else "Successful"
    
    current_caster = None

    for row in table.find_all("tr"):
        cells = row.find_all("td")
        
        if not cells:
            continue

        cell_text = cells[0].get_text(strip=True)
        
        if cell_text in casters_of_interest:
            current_caster = cell_text
        elif current_caster:
            victim = cell_text
            
            victim = re.sub(r'\(.*?\)|\[.*?\]', '', victim).strip()
            
            if victim and "employees" not in victim and "colleagues" not in victim:
                data.append({
                    "Caster": current_caster,
                    "Victim": victim,
                    "Success": success_status
                })

df = pd.DataFrame(data)

print(df)

df.to_csv("cruciatus_curse_data_with_success.csv", index=False)


                 Caster                                    Victim  \
0   Bellatrix Lestrange                         Patricia Rakepick   
1   Bellatrix Lestrange                              Merula Snyde   
2   Bellatrix Lestrange   Unidentified Metamorphmagus Dark wizard   
3   Bellatrix Lestrange                                    Borgin   
4          Harry Potter                                  Avery II   
5          Harry Potter                           Peter Pettigrew   
6          Harry Potter                        Garrick Ollivander   
7          Harry Potter                      Bartemius Crouch Jnr   
8          Harry Potter                               Viktor Krum   
9      Hermione Granger                              Death Eaters   
10     Hermione Granger                                 Ted Tonks   
11     Hermione Granger                           Andromeda Tonks   
12         Draco Malfoy                           Antonin Dolohov   
13         Draco Malfoy           

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re

url = "https://harrypotter.fandom.com/wiki/Sectumsempra"

response = requests.get(url)
soup = BeautifulSoup(response.content, "html.parser")

casters_of_interest = [
    "Harry Potter", "Ron Weasley", "Hermione Granger", "Neville Longbottom",
    "Albus Dumbledore", "Severus Snape", "Draco Malfoy", "Lord Voldemort",
    "Bellatrix Lestrange", "Peter Pettigrew", "Dolores Umbridge"
]

data = []

tables = soup.find_all("table", {"class": "wikitable"})

for i, table in enumerate(tables):
    success_status = "Unsuccessful" if i == 1 else "Successful"
    
    for row in table.find_all("tr"):
        cells = row.find_all("td")

        if len(cells) < 2:
            continue

        caster = cells[0].get_text(strip=True)
        victim = cells[1].get_text(strip=True)

        victim = re.sub(r'\(.*?\)|\[.*?\]', '', victim).strip()
        
        if caster in casters_of_interest:
            data.append({
                "Caster": caster,
                "Victim": victim,
                "Success": success_status
            })

df = pd.DataFrame(data)

print(df)

df.to_csv("sectumsempra_data_with_success.csv", index=False)


          Caster         Victim       Success
0  Severus Snape   James Potter    Successful
1   Harry Potter   Draco Malfoy    Successful
2   Harry Potter  Severus Snape  Unsuccessful


In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re

url = "https://harrypotter.fandom.com/wiki/Imperius_Curse"

response = requests.get(url)
soup = BeautifulSoup(response.content, "html.parser")

casters_of_interest = [
    "Harry Potter", "Ron Weasley", "Hermione Granger", "Neville Longbottom",
    "Albus Dumbledore", "Severus Snape", "Draco Malfoy", "Lord Voldemort",
    "Bellatrix Lestrange", "Peter Pettigrew", "Dolores Umbridge"
]

data = []

tables = soup.find_all("table", {"class": "wikitable"})

for i, table in enumerate(tables):
    success_status = "Unsuccessful" if i == 1 else "Successful"
    
    current_caster = None

    for row in table.find_all("tr"):
        cells = row.find_all("td")
        
        if not cells:
            continue

        cell_text = cells[0].get_text(strip=True)
        
        if cell_text in casters_of_interest:
            current_caster = cell_text
        elif current_caster:
            victim = cell_text
            
            victim = re.sub(r'\(.*?\)|\[.*?\]', '', victim).strip()
            
            if victim and "employees" not in victim and "colleagues" not in victim:
                data.append({
                    "Caster": current_caster,
                    "Victim": victim,
                    "Success": success_status
                })

df = pd.DataFrame(data)

print(df)

df.to_csv("imperius_curse_data_with_success.csv", index=False)


                Caster                                             Victim  \
0   Neville Longbottom                                        Dean Thomas   
1   Neville Longbottom                                     Lavender Brown   
2   Neville Longbottom                                        Viktor Krum   
3       Lord Voldemort                                      Lucius Malfoy   
4       Lord Voldemort                                     Broderick Bode   
5       Lord Voldemort                              An unknownDeath Eater   
6         Draco Malfoy                                     Madam Rosmerta   
7         Draco Malfoy                      Unknown, likely a Death Eater   
8         Draco Malfoy                                      Corban Yaxley   
9         Draco Malfoy                              An unknownDeath Eater   
10        Harry Potter  To replace the curse that theThief's Downfallw...   
11        Harry Potter                                            Travers   

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re

url = "https://harrypotter.fandom.com/wiki/Confundus_Charm"

response = requests.get(url)
soup = BeautifulSoup(response.content, "html.parser")

casters_of_interest = [
    "Harry Potter", "Ron Weasley", "Hermione Granger", "Neville Longbottom",
    "Albus Dumbledore", "Severus Snape", "Draco Malfoy", "Lord Voldemort",
    "Bellatrix Lestrange", "Peter Pettigrew", "Dolores Umbridge"
]

data = []

tables = soup.find_all("table", {"class": "wikitable"})

for i, table in enumerate(tables):
    success_status = "Unsuccessful" if i == 1 else "Successful"
    
    current_caster = None

    for row in table.find_all("tr"):
        cells = row.find_all("td")
        
        if not cells:
            continue

        cell_text = cells[0].get_text(strip=True)
        
        if cell_text in casters_of_interest:
            current_caster = cell_text
        elif current_caster:
            victim = cell_text
            
            victim = re.sub(r'\(.*?\)|\[.*?\]', '', victim).strip()
            
            if victim and "employees" not in victim and "colleagues" not in victim:
                data.append({
                    "Caster": current_caster,
                    "Victim": victim,
                    "Success": success_status
                })

df = pd.DataFrame(data)

print(df)

df.to_csv("confundus_charm_data_with_success.csv", index=False)


        Caster    Victim     Success
0  Ron Weasley  Delphini  Successful


In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re

url = "https://harrypotter.fandom.com/wiki/Disarming_Charm"

response = requests.get(url)
soup = BeautifulSoup(response.content, "html.parser")

casters_of_interest = [
    "Harry Potter", "Ron Weasley", "Hermione Granger", "Neville Longbottom",
    "Albus Dumbledore", "Severus Snape", "Draco Malfoy", "Lord Voldemort",
    "Bellatrix Lestrange", "Peter Pettigrew", "Dolores Umbridge"
]

data = []

tables = soup.find_all("table", {"class": "wikitable"})

for i, table in enumerate(tables):
    success_status = "Unsuccessful" if i == 1 else "Successful"
    
    current_caster = None

    for row in table.find_all("tr"):
        cells = row.find_all("td")
        
        if not cells:
            continue

        cell_text = cells[0].get_text(strip=True)
        
        if cell_text in casters_of_interest:
            current_caster = cell_text
        elif current_caster:
            victim = cell_text
            
            victim = re.sub(r'\(.*?\)|\[.*?\]', '', victim).strip()
            
            if victim and "employees" not in victim and "colleagues" not in victim:
                data.append({
                    "Caster": current_caster,
                    "Victim": victim,
                    "Success": success_status
                })

df = pd.DataFrame(data)

print(df)

df.to_csv("disarming_spell_data_with_success.csv", index=False)


                Caster                                             Victim  \
0     Albus Dumbledore                                    Jacob's sibling   
1     Albus Dumbledore                              1989–1990 school year   
2     Albus Dumbledore                              1990–1991 school year   
3     Albus Dumbledore  Used to disarmVerucca Buckthorn-Snydeof her wa...   
4     Albus Dumbledore                            Verucca Buckthorn-Snyde   
5     Albus Dumbledore                                      Corrine Cribb   
6         Draco Malfoy                                     September,2020   
7         Draco Malfoy                                       Fred Weasley   
8         Draco Malfoy                                     George Weasley   
9         Draco Malfoy                                      Ginny Weasley   
10        Draco Malfoy                                       Sirius Black   
11        Draco Malfoy                                        Remus Lupin   

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re

##obliviate

url = "https://harrypotter.fandom.com/wiki/Memory_Charm"

response = requests.get(url)
soup = BeautifulSoup(response.content, "html.parser")

casters_of_interest = [
    "Harry Potter", "Ron Weasley", "Hermione Granger", "Neville Longbottom",
    "Albus Dumbledore", "Severus Snape", "Draco Malfoy", "Lord Voldemort",
    "Bellatrix Lestrange", "Peter Pettigrew", "Dolores Umbridge"
]

data = []

tables = soup.find_all("table", {"class": "wikitable"})

for i, table in enumerate(tables):
    success_status = "Unsuccessful" if i == 1 else "Successful"
    
    current_caster = None

    for row in table.find_all("tr"):
        cells = row.find_all("td")
        
        if not cells:
            continue

        cell_text = cells[0].get_text(strip=True)
        
        if cell_text in casters_of_interest:
            current_caster = cell_text
        elif current_caster:
            victim = cell_text
            
            victim = re.sub(r'\(.*?\)|\[.*?\]', '', victim).strip()
            
            if victim and "employees" not in victim and "colleagues" not in victim:
                data.append({
                    "Caster": current_caster,
                    "Victim": victim,
                    "Success": success_status
                })

df = pd.DataFrame(data)

print(df)

df.to_csv("memory_spell_data_with_success.csv", index=False)

              Caster                                             Victim  \
0    Peter Pettigrew                                      Alastor Moody   
1    Peter Pettigrew                                  Gilderoy Lockhart   
2    Peter Pettigrew  Jacob's sibling,Penny Haywood,Merula Snyde,And...   
3    Peter Pettigrew                                            Himself   
4    Peter Pettigrew                            Bartemius Crouch Senior   
5    Peter Pettigrew                                MinistryObliviators   
6    Peter Pettigrew                                        Mrs Roberts   
7    Peter Pettigrew                                  Roberts'schildren   
8    Peter Pettigrew                       President of unknown country   
9    Peter Pettigrew                               Kingsley Shacklebolt   
10  Hermione Granger                                     Thorfinn Rowle   
11  Hermione Granger                                     Mugglewaitress   
12  Hermione Granger     

In [None]:
##blasting or confringo

url = "https://harrypotter.fandom.com/wiki/Blasting_Curse"

response = requests.get(url)
soup = BeautifulSoup(response.content, "html.parser")

casters_of_interest = [
    "Harry Potter", "Ron Weasley", "Hermione Granger", "Neville Longbottom",
    "Albus Dumbledore", "Severus Snape", "Draco Malfoy", "Lord Voldemort",
    "Bellatrix Lestrange", "Peter Pettigrew", "Dolores Umbridge"
]

data = []

tables = soup.find_all("table", {"class": "wikitable"})

for i, table in enumerate(tables):
    success_status = "Unsuccessful" if i == 1 else "Successful"
    
    current_caster = None

    for row in table.find_all("tr"):
        cells = row.find_all("td")
        
        if not cells:
            continue

        cell_text = cells[0].get_text(strip=True)
        
        if cell_text in casters_of_interest:
            current_caster = cell_text
        elif current_caster:
            victim = cell_text
            
            victim = re.sub(r'\(.*?\)|\[.*?\]', '', victim).strip()
            
            if victim and "employees" not in victim and "colleagues" not in victim:
                data.append({
                    "Caster": current_caster,
                    "Victim": victim,
                    "Success": success_status
                })

df = pd.DataFrame(data)

print(df)

df.to_csv("blasting_spell_data_with_success.csv", index=False)

            Caster       Victim       Success
0  Peter Pettigrew  Death Eater  Unsuccessful
1  Peter Pettigrew      Unknown  Unsuccessful


In [None]:
url = "https://harrypotter.fandom.com/wiki/Fire-Making_Spell"

response = requests.get(url)
soup = BeautifulSoup(response.content, "html.parser")

casters_of_interest = [
    "Harry Potter", "Ron Weasley", "Hermione Granger", "Neville Longbottom",
    "Albus Dumbledore", "Severus Snape", "Draco Malfoy", "Lord Voldemort",
    "Bellatrix Lestrange", "Peter Pettigrew", "Dolores Umbridge"
]

data = []

tables = soup.find_all("table", {"class": "wikitable"})

for i, table in enumerate(tables):
    success_status = "Unsuccessful" if i == 1 else "Successful"
    
    current_caster = None

    for row in table.find_all("tr"):
        cells = row.find_all("td")
        
        if not cells:
            continue

        cell_text = cells[0].get_text(strip=True)
        
        if cell_text in casters_of_interest:
            current_caster = cell_text
        elif current_caster:
            victim = cell_text
            
            victim = re.sub(r'\(.*?\)|\[.*?\]', '', victim).strip()
            
            if victim and "employees" not in victim and "colleagues" not in victim:
                data.append({
                    "Caster": current_caster,
                    "Victim": victim,
                    "Success": success_status
                })

df = pd.DataFrame(data)

print(df)

df.to_csv("firemaking_spell_data_with_success.csv", index=False)

              Caster                                             Victim  \
0   Albus Dumbledore                                               1938   
1   Albus Dumbledore                                             Carrow   
2   Albus Dumbledore                                 Minerva McGonagall   
3      Severus Snape                                    Jacob's sibling   
4      Severus Snape                                       Bill Weasley   
5      Severus Snape                           Jacob's sibling's friend   
6      Severus Snape                                    Charlie Weasley   
7      Severus Snape                                    Jacob's sibling   
8      Severus Snape  Used this spell in theGreat Hallon a stack ofb...   
9      Severus Snape                                      Rubeus Hagrid   
10     Severus Snape                                 Minerva McGonagall   
11     Severus Snape                                                Tom   
12     Severus Snape     

## megafile creation

In [None]:
##creating mega data frame with the following columns: caster, victim, spell, success, spell_name, caster_house, victim_house, patronus

csv_files = {
    "cruciatus_curse_data_with_success_fixed.csv": "Crucio",
    "confundus_charm_data_with_success_fixed.csv": "Confundo",
    "blasting_spell_data_with_success_fixed.csv": "Confringo",
    "disarming_spell_data_with_success_fixed.csv": "Expelliarmus",
    "firemaking_spell_data_with_success_fixed.csv": "Incendio",
    "imperius_curse_data_with_success_fixed.csv": "Imperio",
    "killing_curse_data_with_success_fixed.csv": "Avada Kedavra",
    "memory_charm_data_with_success_fixed.csv": "Obliviate",
    "sectumsempra_data_with_success_fixed.csv": "Sectumsempra",
    "stunning_spell_data_with_success_fixed.csv": "Stupefy",
    "fullbody_cursedata_with_success_fixed.csv": "Petrificus Totalus",
    "morsmordre_casters_specific.csv": "Morsmordre",
    "patronus_casters_of_interest_data.csv": "Expecto Patronum",
}

##create mega-spell file
dataframes = []
for filename, incantation in csv_files.items():
    df = pd.read_csv(filename)
    
    df['spell_name'] = incantation
    
    dataframes.append(df)

combined_df = pd.concat(dataframes, ignore_index=True)

print(combined_df.head())



                Caster              Victim     Success spell_name
0  Bellatrix Lestrange    Frank Longbottom  Successful     Crucio
1  Bellatrix Lestrange    Alice Longbottom  Successful     Crucio
2  Bellatrix Lestrange  Neville Longbottom  Successful     Crucio
3  Bellatrix Lestrange    Hermoine Granger  Successful     Crucio
4       Lord Voldemort      Bertha Jorkins  Successful     Crucio


In [None]:
##connect to character data (caster house, victim house, patronus)
characters_df = pd.read_csv('characters.csv')
characters_df = characters_df[['Character_Name', 'House']]

caster_df = characters_df.rename(columns={'Character_Name': 'Caster', 'House': 'caster_house'})
combinedchar_df = combined_df.merge(caster_df, on='Caster', how='left')
combinedchar_df = combinedchar_df.merge(caster_df, on='Caster', how='left')

victim_df = characters_df.rename(columns={'Character_Name': 'Victim', 'House': 'victim_house'})
combinedchar_df = combinedchar_df.merge(victim_df, on='Victim', how='left')

print(combinedchar_df.head())



                Caster              Victim     Success spell_name  \
0  Bellatrix Lestrange    Frank Longbottom  Successful     Crucio   
1  Bellatrix Lestrange    Alice Longbottom  Successful     Crucio   
2  Bellatrix Lestrange  Neville Longbottom  Successful     Crucio   
3  Bellatrix Lestrange    Hermoine Granger  Successful     Crucio   
4       Lord Voldemort      Bertha Jorkins  Successful     Crucio   

  caster_house_x caster_house_y victim_house  
0      Slytherin      Slytherin          NaN  
1      Slytherin      Slytherin          NaN  
2      Slytherin      Slytherin   Gryffindor  
3      Slytherin      Slytherin          NaN  
4            NaN            NaN          NaN  


In [None]:
##combinedchar_df.drop("caster_house_y", axis=1, inplace=True)

combinedchar_df.rename(columns={'caster_house_x': 'caster_house'}, inplace=True)

print(combinedchar_df)


NameError: name 'combinedchar_df' is not defined

In [None]:
combinedchar_df.drop("caster_house_y", axis=1, inplace=True)
# Update caster_house for 'Lord Voldemort'
combinedchar_df.loc[combinedchar_df['Caster'] == 'Lord Voldemort', 'caster_house'] = 'Slytherin'

# Update victim_house for 'Lord Voldemort'
combinedchar_df.loc[combinedchar_df['Victim'] == 'Lord Voldemort', 'victim_house'] = 'Slytherin'


NameError: name 'combinedchar_df' is not defined

In [None]:
combinedchar_df.rename(columns={'Caster': 'caster', 'Victim': 'victim', 'Success': 'success'}, inplace=True)


NameError: name 'combinedchar_df' is not defined

In [None]:
combinedchar_df.to_csv("megafile.csv", sep=',', encoding='utf-8')


NameError: name 'combinedchar_df' is not defined

In [None]:
character_nodes = []
megafile_df = pd.read_csv('megafile.csv')
print(megafile_df.head())
characters_df = pd.read_csv('characters.csv')
characters_df.head()

   Unnamed: 0               caster              victim     success spell_name  \
0           0  Bellatrix Lestrange    Frank Longbottom  Successful     Crucio   
1           1  Bellatrix Lestrange    Alice Longbottom  Successful     Crucio   
2           2  Bellatrix Lestrange  Neville Longbottom  Successful     Crucio   
3           3  Bellatrix Lestrange    Hermione Granger  Successful     Crucio   
4           4       Lord Voldemort      Bertha Jorkins  Successful     Crucio   

  caster_house victim_house  
0    Slytherin          NaN  
1    Slytherin          NaN  
2    Slytherin   Gryffindor  
3    Slytherin   Gryffindor  
4    Slytherin          NaN  


Unnamed: 0,Character_ID,Character_Name,Species,Gender,House,Patronus,Wand (Wood),Wand (Core)
0,1,Harry Potter,Human,Male,Gryffindor,Stag,Holly,Phoenix Feather
1,2,Ron Weasley,Human,Male,Gryffindor,Jack Russell Terrier,,
2,3,Hermione Granger,Human,Female,Gryffindor,Otter,Vine,Dragon Heartstring
3,4,Albus Dumbledore,Human,Male,Gryffindor,Phoenix,Elder,Thestral Tail Hair
4,5,Rubeus Hagrid,Half-Human/Half-Giant,Male,Gryffindor,,Oak,


In [8]:
for x in megafile_df['caster']:
    if x not in character_nodes:
        character_nodes.append(x)
for x in megafile_df['victim']:
    if x not in character_nodes:
        character_nodes.append(x)
#print(character_nodes)
nodes_df = pd.DataFrame(character_nodes, columns=["Character_Name"])
nodes_df.to_csv("finalized_nodes.csv", sep=',', encoding='utf-8')

NameError: name 'megafile_df' is not defined