**SILVER LAYER**


**IMPORTS**


In [1]:
import pandas as pd
import numpy as np
import string
import datetime

In [2]:
string.punctuation

'!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'

**LOAD THE BRONZE LAYER**


In [3]:
df = pd.read_parquet('Bronze.parquet')

**Generalized Data Validation and Cleaning Functions**


Drop colums with more than threshold of null values


In [4]:
def drop_null_columns(df, threshold = 0.2):
    null_percentage = df.isnull().mean()
    columns_to_keep = null_percentage[null_percentage < threshold].index
    if len(columns_to_keep) == len(df.columns):
        print('No columns removed')
    else:
        print(f'removing: {[c for c in df.columns if c not in columns_to_keep]}')
    return df[columns_to_keep]

Fill missing values with assigned defaults in columns_defaults dictionary


In [5]:
def fill_missing_values(df, columns_defaults: dict):
    for column, default_value in columns_defaults.items():
        df[column] = df[column].fillna(default_value)
    return df

Convert columns to desired data types


In [6]:
def convert_column_types(df,columns_types: dict):
    try:
        for column, dtype in columns_types.items():
            df[column] = df[column].astype(dtype)
        return df
    except Exception as e:
        print(f'{column} caused an issue')
        raise e


Remove punctation from string type columns


In [7]:
def remove_punctuation(df, columns: list):
    for c in columns:
        df.loc[:,c] = df[c].str.replace(r'[^\w\s]|_','',regex = True)
    return df


Check the schema of the table to validate formats


In [8]:
def check_formats(df, expected_formats:dict):
    incorrect_formats = []
    for column, datatype in df.dtypes.to_dict().items():
        expected_type = expected_formats.get(column)
        if expected_type != datatype:
            incorrect_formats.append((column, datatype, expected_type))
    incorrect_columns = [c[0] for c in incorrect_formats]
    correct_format_count = len([c for c in df.columns if c not in incorrect_columns])
    if incorrect_formats:
        print('Below are incorrect formats')
        print('-' * 50)
        print(f'Correct Column : {correct_format_count}')
        return pd.DataFrame(incorrect_formats, columns = ['Column', 'Actual', 'Expected'])
    else:
        print('Validation Complete, no discrepancies')

Check the similarity between two words using Jaccard Similarity


In [9]:
def check_similarity(word1: str, word2:str) -> float:
    # Convert string to a set of character
    word_set1 = set(word1)
    word_set2 = set(word2)

    # Get the count of how many characters are in both sets
    intersection = word_set1.intersection(word_set2)
    intersection_count = len(intersection)

    # Get the count of total unique characters of both sets
    total_char_count = len(word_set1.union(word_set2))

    # Return the precentage of the intersection against total unique characters
    similarity = intersection_count / total_char_count
    return similarity

Check Mispelling


In [10]:
def check_mispelling(dataframe: pd.DataFrame, column: str, similarity_threshold: float) -> pd.DataFrame:
    all_unique_values = list(set(dataframe[column].tolist()))
    similarity_list = []
    for n in range(len(all_unique_values)):
        value1 = all_unique_values[n]
        for n2 in range (n + 1, len(all_unique_values)):
            value2 = all_unique_values[n2]
            similarity = round(check_similarity(value1, value2), 4)
            if similarity >= similarity_threshold:
                similarity_list.append([value1, value2, similarity])
    return pd.DataFrame(similarity_list, columns = ['name1', 'name2', 'similarity'])

In [11]:
df.head()

Unnamed: 0,Student Name,House,Year,Wand Type,Pet,Potions Grade,Defense Against the Dark Arts Grade,Transfiguration Grade,Spells Learned,Quidditch Position,Points Earned for House,Detentions,Participation in Dueling Club,Triwizard Tournament Involvement,Items Owned,Knuts Spent in Hogsmeade,Attendance at Classes,Magical Accidents,fileName,loadDatetimeStamp
0,Seamus Potter,Slytherin,2,"9 inches, elm, veela hair core",Dragon,Outstanding,Exceeds Expectations,Outstanding,26,Beater,-100,3,No,No,"Time-Turner, Marauder's Map",88,74,5,Hogwarts_Student_Data_20240101.csv,2024-12-19 11:50:28.089582
1,Ginny Spinnet,Gryffindor,6,"9 inches, elm, veela hair core",Cat,Exceeds Expectations,Acceptable,Outstanding,11,Chaser,139,2,No,No,"Firebolt, Sneakoscope, Extendable Ears",99,67,6,Hogwarts_Student_Data_20240101.csv,2024-12-19 11:50:28.089582
2,Padma Parkinson,Gryffindor,4,"11 inches, holly, phoenix feather core",Muggle Born,Outstanding,Acceptable,Outstanding,19,Seeker,93,2,No,No,Invisibility Cloak,342,88,6,Hogwarts_Student_Data_20240101.csv,2024-12-19 11:50:28.089582
3,,Ravenclaw,1,"12 inches, yew, unicorn hair core",Toad,Exceeds Expectations,Exceeds Expectations,Exceeds Expectations,16,Seeker,-73,2,No,,Time-Turner,233,45,5,Hogwarts_Student_Data_20240101.csv,2024-12-19 11:50:28.089582
4,,Ravenclaw,1,"10 inches, oak, dragon heartstring core",Owl,,Outstanding,Exceeds Expectations,9,Keeper,90,0,No,Yes,"Marauder's Map, Sneakoscope",473,27,7,Hogwarts_Student_Data_20240101.csv,2024-12-19 11:50:28.089582


In [12]:
df.isnull().sum()

Student Name                           13440
House                                   4000
Year                                       0
Wand Type                                  0
Pet                                     7946
Potions Grade                           3440
Defense Against the Dark Arts Grade        0
Transfiguration Grade                      0
Spells Learned                             0
Quidditch Position                      9504
Points Earned for House                    0
Detentions                                 0
Participation in Dueling Club              0
Triwizard Tournament Involvement        5920
Items Owned                                0
Knuts Spent in Hogsmeade                   0
Attendance at Classes                      0
Magical Accidents                          0
fileName                                   0
loadDatetimeStamp                          0
dtype: int64

In [13]:
df.isnull().mean() * 100

Student Name                           28.000000
House                                   8.333333
Year                                    0.000000
Wand Type                               0.000000
Pet                                    16.554167
Potions Grade                           7.166667
Defense Against the Dark Arts Grade     0.000000
Transfiguration Grade                   0.000000
Spells Learned                          0.000000
Quidditch Position                     19.800000
Points Earned for House                 0.000000
Detentions                              0.000000
Participation in Dueling Club           0.000000
Triwizard Tournament Involvement       12.333333
Items Owned                             0.000000
Knuts Spent in Hogsmeade                0.000000
Attendance at Classes                   0.000000
Magical Accidents                       0.000000
fileName                                0.000000
loadDatetimeStamp                       0.000000
dtype: float64

In [14]:
drop_null_columns(df, threshold = .3)



No columns removed


Unnamed: 0,Student Name,House,Year,Wand Type,Pet,Potions Grade,Defense Against the Dark Arts Grade,Transfiguration Grade,Spells Learned,Quidditch Position,Points Earned for House,Detentions,Participation in Dueling Club,Triwizard Tournament Involvement,Items Owned,Knuts Spent in Hogsmeade,Attendance at Classes,Magical Accidents,fileName,loadDatetimeStamp
0,Seamus Potter,Slytherin,2,"9 inches, elm, veela hair core",Dragon,Outstanding,Exceeds Expectations,Outstanding,26,Beater,-100,3,No,No,"Time-Turner, Marauder's Map",88,74,5,Hogwarts_Student_Data_20240101.csv,2024-12-19 11:50:28.089582
1,Ginny Spinnet,Gryffindor,6,"9 inches, elm, veela hair core",Cat,Exceeds Expectations,Acceptable,Outstanding,11,Chaser,139,2,No,No,"Firebolt, Sneakoscope, Extendable Ears",99,67,6,Hogwarts_Student_Data_20240101.csv,2024-12-19 11:50:28.089582
2,Padma Parkinson,Gryffindor,4,"11 inches, holly, phoenix feather core",Muggle Born,Outstanding,Acceptable,Outstanding,19,Seeker,93,2,No,No,Invisibility Cloak,342,88,6,Hogwarts_Student_Data_20240101.csv,2024-12-19 11:50:28.089582
3,,Ravenclaw,1,"12 inches, yew, unicorn hair core",Toad,Exceeds Expectations,Exceeds Expectations,Exceeds Expectations,16,Seeker,-73,2,No,,Time-Turner,233,45,5,Hogwarts_Student_Data_20240101.csv,2024-12-19 11:50:28.089582
4,,Ravenclaw,1,"10 inches, oak, dragon heartstring core",Owl,,Outstanding,Exceeds Expectations,9,Keeper,90,0,No,Yes,"Marauder's Map, Sneakoscope",473,27,7,Hogwarts_Student_Data_20240101.csv,2024-12-19 11:50:28.089582
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
47995,Susan Potter,Slytherin,1,"11 inches, holly, phoenix feather core",,Outstanding,Outstanding,Exceeds Expectations,24,Seeker,90,5,Yes,Yes,"Marauder's Map, Firebolt",399,40,1,Hogwarts_Student_Data_20240601.csv,2024-12-19 11:50:28.344478
47996,Alicyia Boot,,4,"11 inches, holly, phoenix feather core",Toad,,Poor,Poor,28,Seeker,-9,1,Yes,Yes,"Extendable Ears, Marauder's Map, Invisibility ...",299,27,5,Hogwarts_Student_Data_20240601.csv,2024-12-19 11:50:28.344478
47997,Terry Granger,Ravenclaw,7,"9 inches, elm, veela hair core",Owl,Outstanding,Poor,Exceeds Expectations,12,Seeker,-47,4,Yes,No,"Sneakoscope, Firebolt",124,27,10,Hogwarts_Student_Data_20240601.csv,2024-12-19 11:50:28.344478
47998,Ron Bell,Hufflepuff,1,"9 inches, elm, veela hair core",Dragon,Exceeds Expectations,Acceptable,Acceptable,11,Chaser,75,5,Yes,No,"Firebolt, Sneakoscope",362,49,7,Hogwarts_Student_Data_20240601.csv,2024-12-19 11:50:28.344478


In [15]:
fill_missing_values(df,{'Student Name': 'He who must not be named', 'House': 'Unknown', 'Year': 0, 'Wand Type': 'Unknown', 'Pet': 'Unknown', 'Potions Grade': 'No Grade', 'Defense Against the Dark Arts Grade': 'No Grade', 'Transfiguration Grade': 'No Grade', 'Spells Learned': 0, 'Quidditch Position': 'Not Playing', 'Points Earned for House': 0, 'Detentions': 0, 'Participation in Dueling Club': 'Unknown', 'Triwizard Tournament Involvement': 'Unknown', 'Items Owned': 'No Items', 'Knuts Spent in Hogsmeade': 0, 'Attendance at Classes': 0, 'Magical Accidents': 0})


Unnamed: 0,Student Name,House,Year,Wand Type,Pet,Potions Grade,Defense Against the Dark Arts Grade,Transfiguration Grade,Spells Learned,Quidditch Position,Points Earned for House,Detentions,Participation in Dueling Club,Triwizard Tournament Involvement,Items Owned,Knuts Spent in Hogsmeade,Attendance at Classes,Magical Accidents,fileName,loadDatetimeStamp
0,Seamus Potter,Slytherin,2,"9 inches, elm, veela hair core",Dragon,Outstanding,Exceeds Expectations,Outstanding,26,Beater,-100,3,No,No,"Time-Turner, Marauder's Map",88,74,5,Hogwarts_Student_Data_20240101.csv,2024-12-19 11:50:28.089582
1,Ginny Spinnet,Gryffindor,6,"9 inches, elm, veela hair core",Cat,Exceeds Expectations,Acceptable,Outstanding,11,Chaser,139,2,No,No,"Firebolt, Sneakoscope, Extendable Ears",99,67,6,Hogwarts_Student_Data_20240101.csv,2024-12-19 11:50:28.089582
2,Padma Parkinson,Gryffindor,4,"11 inches, holly, phoenix feather core",Muggle Born,Outstanding,Acceptable,Outstanding,19,Seeker,93,2,No,No,Invisibility Cloak,342,88,6,Hogwarts_Student_Data_20240101.csv,2024-12-19 11:50:28.089582
3,He who must not be named,Ravenclaw,1,"12 inches, yew, unicorn hair core",Toad,Exceeds Expectations,Exceeds Expectations,Exceeds Expectations,16,Seeker,-73,2,No,Unknown,Time-Turner,233,45,5,Hogwarts_Student_Data_20240101.csv,2024-12-19 11:50:28.089582
4,He who must not be named,Ravenclaw,1,"10 inches, oak, dragon heartstring core",Owl,No Grade,Outstanding,Exceeds Expectations,9,Keeper,90,0,No,Yes,"Marauder's Map, Sneakoscope",473,27,7,Hogwarts_Student_Data_20240101.csv,2024-12-19 11:50:28.089582
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
47995,Susan Potter,Slytherin,1,"11 inches, holly, phoenix feather core",Unknown,Outstanding,Outstanding,Exceeds Expectations,24,Seeker,90,5,Yes,Yes,"Marauder's Map, Firebolt",399,40,1,Hogwarts_Student_Data_20240601.csv,2024-12-19 11:50:28.344478
47996,Alicyia Boot,Unknown,4,"11 inches, holly, phoenix feather core",Toad,No Grade,Poor,Poor,28,Seeker,-9,1,Yes,Yes,"Extendable Ears, Marauder's Map, Invisibility ...",299,27,5,Hogwarts_Student_Data_20240601.csv,2024-12-19 11:50:28.344478
47997,Terry Granger,Ravenclaw,7,"9 inches, elm, veela hair core",Owl,Outstanding,Poor,Exceeds Expectations,12,Seeker,-47,4,Yes,No,"Sneakoscope, Firebolt",124,27,10,Hogwarts_Student_Data_20240601.csv,2024-12-19 11:50:28.344478
47998,Ron Bell,Hufflepuff,1,"9 inches, elm, veela hair core",Dragon,Exceeds Expectations,Acceptable,Acceptable,11,Chaser,75,5,Yes,No,"Firebolt, Sneakoscope",362,49,7,Hogwarts_Student_Data_20240601.csv,2024-12-19 11:50:28.344478


In [16]:
df.isnull().mean() * 100

Student Name                           0.0
House                                  0.0
Year                                   0.0
Wand Type                              0.0
Pet                                    0.0
Potions Grade                          0.0
Defense Against the Dark Arts Grade    0.0
Transfiguration Grade                  0.0
Spells Learned                         0.0
Quidditch Position                     0.0
Points Earned for House                0.0
Detentions                             0.0
Participation in Dueling Club          0.0
Triwizard Tournament Involvement       0.0
Items Owned                            0.0
Knuts Spent in Hogsmeade               0.0
Attendance at Classes                  0.0
Magical Accidents                      0.0
fileName                               0.0
loadDatetimeStamp                      0.0
dtype: float64

In [17]:
remove_punctuation(df, ['Quidditch Position'])

Unnamed: 0,Student Name,House,Year,Wand Type,Pet,Potions Grade,Defense Against the Dark Arts Grade,Transfiguration Grade,Spells Learned,Quidditch Position,Points Earned for House,Detentions,Participation in Dueling Club,Triwizard Tournament Involvement,Items Owned,Knuts Spent in Hogsmeade,Attendance at Classes,Magical Accidents,fileName,loadDatetimeStamp
0,Seamus Potter,Slytherin,2,"9 inches, elm, veela hair core",Dragon,Outstanding,Exceeds Expectations,Outstanding,26,Beater,-100,3,No,No,"Time-Turner, Marauder's Map",88,74,5,Hogwarts_Student_Data_20240101.csv,2024-12-19 11:50:28.089582
1,Ginny Spinnet,Gryffindor,6,"9 inches, elm, veela hair core",Cat,Exceeds Expectations,Acceptable,Outstanding,11,Chaser,139,2,No,No,"Firebolt, Sneakoscope, Extendable Ears",99,67,6,Hogwarts_Student_Data_20240101.csv,2024-12-19 11:50:28.089582
2,Padma Parkinson,Gryffindor,4,"11 inches, holly, phoenix feather core",Muggle Born,Outstanding,Acceptable,Outstanding,19,Seeker,93,2,No,No,Invisibility Cloak,342,88,6,Hogwarts_Student_Data_20240101.csv,2024-12-19 11:50:28.089582
3,He who must not be named,Ravenclaw,1,"12 inches, yew, unicorn hair core",Toad,Exceeds Expectations,Exceeds Expectations,Exceeds Expectations,16,Seeker,-73,2,No,Unknown,Time-Turner,233,45,5,Hogwarts_Student_Data_20240101.csv,2024-12-19 11:50:28.089582
4,He who must not be named,Ravenclaw,1,"10 inches, oak, dragon heartstring core",Owl,No Grade,Outstanding,Exceeds Expectations,9,Keeper,90,0,No,Yes,"Marauder's Map, Sneakoscope",473,27,7,Hogwarts_Student_Data_20240101.csv,2024-12-19 11:50:28.089582
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
47995,Susan Potter,Slytherin,1,"11 inches, holly, phoenix feather core",Unknown,Outstanding,Outstanding,Exceeds Expectations,24,Seeker,90,5,Yes,Yes,"Marauder's Map, Firebolt",399,40,1,Hogwarts_Student_Data_20240601.csv,2024-12-19 11:50:28.344478
47996,Alicyia Boot,Unknown,4,"11 inches, holly, phoenix feather core",Toad,No Grade,Poor,Poor,28,Seeker,-9,1,Yes,Yes,"Extendable Ears, Marauder's Map, Invisibility ...",299,27,5,Hogwarts_Student_Data_20240601.csv,2024-12-19 11:50:28.344478
47997,Terry Granger,Ravenclaw,7,"9 inches, elm, veela hair core",Owl,Outstanding,Poor,Exceeds Expectations,12,Seeker,-47,4,Yes,No,"Sneakoscope, Firebolt",124,27,10,Hogwarts_Student_Data_20240601.csv,2024-12-19 11:50:28.344478
47998,Ron Bell,Hufflepuff,1,"9 inches, elm, veela hair core",Dragon,Exceeds Expectations,Acceptable,Acceptable,11,Chaser,75,5,Yes,No,"Firebolt, Sneakoscope",362,49,7,Hogwarts_Student_Data_20240601.csv,2024-12-19 11:50:28.344478


In [18]:
check_mispelling(df, 'Student Name', .8)

Unnamed: 0,name1,name2,similarity
0,Cho GrangJer,George Chang,0.9091
1,Cho GrangJer,GeorWge Chang,0.8333
2,Cho GrangJer,GeForge Chang,0.8333
3,Cho GrangJer,Cho GrangTer,0.8333
4,Cho GrangJer,Cho jGranger,0.8333
...,...,...,...
27234,Skeamus Finnigan,Seamgus Finnigan,0.9167
27235,Terry Broown,Terry OBrown,0.9000
27236,Seamus MalfoLy,Seamus Malmfoy,0.9231
27237,Clho Bones,Choo Bones,0.8889


In [19]:
df.loc[:,'House'] = df['House'].str.capitalize()

In [20]:
result = check_mispelling(df, 'Student Name', 0.8)
harry_potter_matches = result[(result['name1'].str.lower().str.contains("harry potter")) | 
                              (result['name2'].str.lower().str.contains("harry potter"))]
for name in harry_potter_matches['name1']:
    print(f"'{name}'")

'Haqrry Potter'
'Haqrry Potter'
'Harry PotterO'
'Harry PotterO'
'Harry PotterO'
'Harry PotterO'
'Harry PotterO'
'Harry PotterO'
'Harry PotterO'
'Harry PotterO'
'Harry Potter'
'Harry Potter'
'Harry Potter'
'Harry Potter'
'Harry Potter'
'Harry Potter'
'Harry Potter'
'Harry Potter'


In [21]:
df2 = df.copy()
rename_values = {
    'Harry Pojtter': 'Harry Potter',
    'Harry Pojtter': 'Harry Potter',
    'Harry PotterO': 'Harry Potter',
    'Harry PotterO': 'Harry Potter',
    'Harry PotterO': 'Harry Potter',
    'Harry PotterO': 'Harry Potter',
    'Harry PotterO': 'Harry Potter',
    'Harry PotterO': 'Harry Potter',
    'Harry PotterO': 'Harry Potter',
    'Harry PotterO': 'Harry Potter',
    'HarrLy Potter': 'Harry Potter',
    'Harry Pottefr': 'Harry Potter',
    'Terry HPotter': 'Harry Potter',
    'Harry Poktter': 'Harry Potter',
    'Haqrry Potter': 'Harry Potter',
    'Harry PotNter': 'Harry Potter',
    'Harry PoJtter': 'Harry Potter'
}

df2['Student Name'] = df2['Student Name'].replace(rename_values)
unique_names = sorted(df2['Student Name'].unique())
for name in unique_names:
    if name.startswith('Harry'):
        print(name)

Harry ADiggory
Harry Bell
Harry Belll
Harry Bello
Harry Beltl
Harry Bgrown
Harry BoZot
Harry BonJes
Harry Bonces
Harry BoneLs
Harry BoneSs
Harry Bones
Harry Bonest
Harry Boot
Harry BootZ
Harry Brbown
Harry Brhown
Harry BrowHn
Harry Brown
Harry Brownp
Harry Bwell
Harry ChYang
Harry ChanTg
Harry Chandg
Harry Chang
Harry Deiggory
Harry DiggoLry
Harry Diggory
Harry DiggoryQ
Harry DiggoryR
Harry Digrgory
Harry Diuggory
Harry FiGnnigan
Harry FinnRigan
Harry FinniZgan
Harry Finnigan
Harry Finvnigan
Harry Grangder
Harry GrangePr
Harry Granger
Harry Grangerb
Harry Grangerw
Harry JDohnson
Harry Johnsion
Harry JohnsoZn
Harry Johnson
Harry JohnsonV
Harry LChang
Harry LKovegood
Harry LoIvegood
Harry LonLgbottom
Harry LongTbottom
Harry Longbeottom
Harry Longbottom
Harry LongbottomI
Harry Lonzgbottom
Harry LovegooJd
Harry Lovegood
Harry Lovegoogd
Harry Lovegoozd
Harry MWalfoy
Harry MaWlfoy
Harry Malfoy
Harry Malfoyr
Harry OLovegood
Harry PGatil
Harry PWarkinson
Harry ParRkinson
Harry ParkiCnson
Harry

In [22]:
expected_formats = {
"Student Name":"string",
"House":"string",
"Year":"int32",
"Wand Type":"string",
"Pet":"string",
"Potions Grade":"string",
"Defense Against the Dark Arts Grade":"string",
"Transfiguration Grade":"string",
"Spells Learned":"int32",
"Quidditch Position":"string",
"Points Earned for House":"int32",
"Detentions":"int32",
"Participation in Dueling Club":"string",
"Triwizard Tournament Involvement":"string",
"Items Owned":"string",
"Knuts Spent in Hogsmeade":"int32",
"Attendance at Classes":"int32",
"Magical Accidents":"int32",
"fileName":"string",
"loadDatetimeStamp":"datetime64[ns]"
}

In [23]:
check_formats(df, expected_formats = expected_formats)

Below are incorrect formats
--------------------------------------------------
Correct Column : 0


Unnamed: 0,Column,Actual,Expected
0,Student Name,object,string
1,House,object,string
2,Year,int64,int32
3,Wand Type,object,string
4,Pet,object,string
5,Potions Grade,object,string
6,Defense Against the Dark Arts Grade,object,string
7,Transfiguration Grade,object,string
8,Spells Learned,int64,int32
9,Quidditch Position,object,string


In [24]:
convert_column_types(df, columns_types = expected_formats)

Unnamed: 0,Student Name,House,Year,Wand Type,Pet,Potions Grade,Defense Against the Dark Arts Grade,Transfiguration Grade,Spells Learned,Quidditch Position,Points Earned for House,Detentions,Participation in Dueling Club,Triwizard Tournament Involvement,Items Owned,Knuts Spent in Hogsmeade,Attendance at Classes,Magical Accidents,fileName,loadDatetimeStamp
0,Seamus Potter,Slytherin,2,"9 inches, elm, veela hair core",Dragon,Outstanding,Exceeds Expectations,Outstanding,26,Beater,-100,3,No,No,"Time-Turner, Marauder's Map",88,74,5,Hogwarts_Student_Data_20240101.csv,2024-12-19 11:50:28.089582
1,Ginny Spinnet,Gryffindor,6,"9 inches, elm, veela hair core",Cat,Exceeds Expectations,Acceptable,Outstanding,11,Chaser,139,2,No,No,"Firebolt, Sneakoscope, Extendable Ears",99,67,6,Hogwarts_Student_Data_20240101.csv,2024-12-19 11:50:28.089582
2,Padma Parkinson,Gryffindor,4,"11 inches, holly, phoenix feather core",Muggle Born,Outstanding,Acceptable,Outstanding,19,Seeker,93,2,No,No,Invisibility Cloak,342,88,6,Hogwarts_Student_Data_20240101.csv,2024-12-19 11:50:28.089582
3,He who must not be named,Ravenclaw,1,"12 inches, yew, unicorn hair core",Toad,Exceeds Expectations,Exceeds Expectations,Exceeds Expectations,16,Seeker,-73,2,No,Unknown,Time-Turner,233,45,5,Hogwarts_Student_Data_20240101.csv,2024-12-19 11:50:28.089582
4,He who must not be named,Ravenclaw,1,"10 inches, oak, dragon heartstring core",Owl,No Grade,Outstanding,Exceeds Expectations,9,Keeper,90,0,No,Yes,"Marauder's Map, Sneakoscope",473,27,7,Hogwarts_Student_Data_20240101.csv,2024-12-19 11:50:28.089582
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
47995,Susan Potter,Slytherin,1,"11 inches, holly, phoenix feather core",Unknown,Outstanding,Outstanding,Exceeds Expectations,24,Seeker,90,5,Yes,Yes,"Marauder's Map, Firebolt",399,40,1,Hogwarts_Student_Data_20240601.csv,2024-12-19 11:50:28.344478
47996,Alicyia Boot,Unknown,4,"11 inches, holly, phoenix feather core",Toad,No Grade,Poor,Poor,28,Seeker,-9,1,Yes,Yes,"Extendable Ears, Marauder's Map, Invisibility ...",299,27,5,Hogwarts_Student_Data_20240601.csv,2024-12-19 11:50:28.344478
47997,Terry Granger,Ravenclaw,7,"9 inches, elm, veela hair core",Owl,Outstanding,Poor,Exceeds Expectations,12,Seeker,-47,4,Yes,No,"Sneakoscope, Firebolt",124,27,10,Hogwarts_Student_Data_20240601.csv,2024-12-19 11:50:28.344478
47998,Ron Bell,Hufflepuff,1,"9 inches, elm, veela hair core",Dragon,Exceeds Expectations,Acceptable,Acceptable,11,Chaser,75,5,Yes,No,"Firebolt, Sneakoscope",362,49,7,Hogwarts_Student_Data_20240601.csv,2024-12-19 11:50:28.344478


In [25]:
check_formats(df, expected_formats = expected_formats)

Validation Complete, no discrepancies


In [26]:
df.to_parquet('Silver.parquet', index = False)