# PAC 2 - Nicolas Desjonqueres

## Exercise 1

(a) True. A global variable can be modified inside a function if it's referenced using "global" inside of the function.

(b) False. When a file is opened using the "with" statement, it automatically closes the file when the block inside "with" ends.

(c) True. The filter function in Python takes a function and an iterable as parameters and returns an iterator that applies the given function to each element of the iterable.

(d) True. Anonymous functions, also known as lambda functions in Python, do not have a name and can be defined in a single expression.

## Exercise 2

In [112]:
import pandas as pd
import os
import shutil

# I intentionally did not make additional modular functions because it was not needed here in my opinion, the...
# ...code is not very long and pretty straight-forward with not a lot of logic operations.

def ex_2(csv_path, output_path, number_of_years_to_go_back = 5, max_number_of_players_to_save = 5, csv_delim = ";"):
    # imports the data with a custom delimiter if needed
    data = pd.read_csv(csv_path, delimiter=csv_delim) 
    
    # the database path
    db_path = output_path + "/draft_db" 
    
    # deletes the dataabse directory if it exists with the output path given
    if os.path.exists(db_path) and os.path.isdir(db_path): 
        shutil.rmtree(db_path)
        
    # creates the first folder of the database
    os.mkdir(db_path) 
    
    # sorts the data by year, descending
    data = data.sort_values(by = "year", ascending = False, ignore_index = True) 
    
    # gets the max year of the database which is the first row of the data
    max_year = data.loc[0, "year"] 
    
    # gets only the rows of the data that are requested (with the parameter number_of_years_to_go_back) 
    data = data[data["year"] >= max_year - number_of_years_to_go_back] 

    # iterates though the data grouped by year
    for y in data.groupby(["year"]): 
        year = y[0][0]

        year_data = y[1]

        # makes the year directory
        os.mkdir(os.path.join(db_path, str(year)))

        for n in year_data.groupby(["nationality"]):
            nationality = n[0][0]

            # sorts the players alphabetically
            player_data = n[1].sort_values(by = "player", ignore_index = True) 

            # makes the directory for the nationality
            os.mkdir(os.path.join(db_path, str(year), nationality)) 
            
            player_count = 0
            
            for index, player in player_data.iterrows():
                # if we surpassed the max number of players that we need, break out of the loop
                if player_count >= max_number_of_players_to_save: 
                    break
                
                # the complete info string for each player
                info_string = f'Team: {player["team"]}; Amateur_team: {player["amateur_team"]}; Age: {str(player["age"])}; Position: {player["position"]};'

                # makes the directory for the player
                os.mkdir(os.path.join(db_path, str(year), nationality, player["player"])) 
                
                # writes the info in the text file that we create when we open a new file with a non-existing path
                with open(os.path.join(db_path, str(year), nationality, player["player"], "info.txt"), "x") as file:
                    file.write(info_string)
                
                player_count += 1
            
            
    
ex2("nhldraft.csv", "", number_of_years_to_go_back = 2, max_number_of_players_to_save = 5, csv_delim = ",")

## Exercise 3

#### 1.

In [104]:
import csv

def ex_3_1(file_path):
    with open(file_path, 'r', newline='') as file:
        reader = csv.DictReader(file)
        
        data = list(map(dict, reader))
        
    return data

data_to_list = ex_3_1("nhldraft.csv")

print(data_to_list[:2])

[{'id': '1', 'year': '2022', 'overall_pick': '1', 'team': 'Montreal Canadiens', 'player': 'Juraj Slafkovsky', 'nationality': 'SK', 'position': 'LW', 'age': '18', 'to_year': '', 'amateur_team': 'TPS (Finland)', 'games_played': '', 'goals': '', 'assists': '', 'points': '', 'plus_minus': '', 'penalties_minutes': '', 'goalie_games_played': '', 'goalie_wins': '', 'goalie_losses': '', 'goalie_ties_overtime': '', 'save_percentage': '', 'goals_against_average': '', 'point_shares': ''}, {'id': '2', 'year': '2022', 'overall_pick': '2', 'team': 'New Jersey Devils', 'player': 'Simon Nemec', 'nationality': 'SK', 'position': 'D', 'age': '18', 'to_year': '', 'amateur_team': 'HK Nitra (Slovakia)', 'games_played': '', 'goals': '', 'assists': '', 'points': '', 'plus_minus': '', 'penalties_minutes': '', 'goalie_games_played': '', 'goalie_wins': '', 'goalie_losses': '', 'goalie_ties_overtime': '', 'save_percentage': '', 'goals_against_average': '', 'point_shares': ''}]


#### 2.

In [14]:
def ex_3_2(data, team_name):
    filtered_players = list(filter(lambda player: player['team'] == team_name, data))
    return filtered_players
            
print(ex_3_2(data_to_list, "Detroit Red Wings")[:2])

[{'id': '8', 'year': '2022', 'overall_pick': '8', 'team': 'Detroit Red Wings', 'player': 'Marco Kasper', 'nationality': 'AT', 'position': 'C', 'age': '18', 'to_year': '', 'amateur_team': 'Rogle BK (Sweden)', 'games_played': '', 'goals': '', 'assists': '', 'points': '', 'plus_minus': '', 'penalties_minutes': '', 'goalie_games_played': '', 'goalie_wins': '', 'goalie_losses': '', 'goalie_ties_overtime': '', 'save_percentage': '', 'goals_against_average': '', 'point_shares': ''}, {'id': '40', 'year': '2022', 'overall_pick': '40', 'team': 'Detroit Red Wings', 'player': 'Dylan James', 'nationality': 'CA', 'position': 'LW', 'age': '18', 'to_year': '', 'amateur_team': 'Sioux City Musketeers (USHL)', 'games_played': '', 'goals': '', 'assists': '', 'points': '', 'plus_minus': '', 'penalties_minutes': '', 'goalie_games_played': '', 'goalie_wins': '', 'goalie_losses': '', 'goalie_ties_overtime': '', 'save_percentage': '', 'goals_against_average': '', 'point_shares': ''}]


#### 3.

In [None]:
def ex_3_3(data):
    players_per_year = {}
    years = list(map(lambda player: player['Year'], data))
    for year in set(years):
        players_per_year[year] = len(list(filter(lambda x: x == year, years)))
    return players_per_year

## Exercise 4

In [152]:
import pandas as pd

def read_book(path):
    with open(path, "r", encoding="utf8") as file:
        file_text = file.read()
    
    chapters_list = file_text.split("\n\n\n\n\n")
    
    chapters_dict = [{"title": chapter.split("\n")[0], "text": chapter[len(chapter.split("\n")[0]):].strip("\n").strip()} for chapter in chapters_list]
    
    #chapters_dict = [{"title": chapter.split("\n")[0], "text": chapter[len(chapter.split("\n")[0]):].strip("\n").strip()} for chapter in chapters_list]
    
    return pd.DataFrame.from_dict(chapters_dict)
    
print(read_book("The Adventures of Sherlock Holmes.txt"))

                                        title  \
0                    ﻿I. A SCANDAL IN BOHEMIA   
1                   II. THE RED-HEADED LEAGUE   
2                     III. A CASE OF IDENTITY   
3             IV. THE BOSCOMBE VALLEY MYSTERY   
4                     V. THE FIVE ORANGE PIPS   
5            VI. THE MAN WITH THE TWISTED LIP   
6    VII. THE ADVENTURE OF THE BLUE CARBUNCLE   
7    VIII. THE ADVENTURE OF THE SPECKLED BAND   
8   IX. THE ADVENTURE OF THE ENGINEER’S THUMB   
9      X. THE ADVENTURE OF THE NOBLE BACHELOR   
10     XI. THE ADVENTURE OF THE BERYL CORONET   
11   XII. THE ADVENTURE OF THE COPPER BEECHES   

                                                 text  
0   To Sherlock Holmes she is always _the_ woman. ...  
1   I had called upon my friend, Mr. Sherlock Holm...  
2   “My dear fellow,” said Sherlock Holmes as we s...  
3   We were seated at breakfast one morning, my wi...  
4   When I glance over my notes and records of the...  
5   Isa Whitney, brother o

In [157]:
import os
import shutil

def split_chapters_into_files(data, output_folder):
    book_path = output_folder
    
    if os.path.exists(book_path) and os.path.isdir(book_path): 
        shutil.rmtree(book_path)
    
    os.mkdir(book_path)
        
    for index, chapter in data.iterrows():
        
        with open(os.path.join(book_path, f'chapter_{str(index)}.txt'), "x") as file:
            file.write(chapter["text"])

book = read_book("The Adventures of Sherlock Holmes.txt")

split_chapters_into_files(book, "Chapters")

In [154]:
import zipfile

def compress_files(chapters_folder, zip_name, kind = "zip"):    
    chapter_files = [f for f in os.listdir(chapters_folder) if f.startswith("chapter") and f.endswith(".txt")]
    print(chapter_files)
    
    with zipfile.ZipFile(zip_name + f'.{kind}', 'w') as zipf:
            for chapter_file in chapter_files:
                zipf.write(os.path.join(chapters_folder, chapter_file), chapter_file)
    
compress_files("Chapters", "ababa")

['chapter_0.txt', 'chapter_1.txt', 'chapter_10.txt', 'chapter_11.txt', 'chapter_2.txt', 'chapter_3.txt', 'chapter_4.txt', 'chapter_5.txt', 'chapter_6.txt', 'chapter_7.txt', 'chapter_8.txt', 'chapter_9.txt']


In [155]:
def get_size(path):
    # Check if the path exists
    if not os.path.exists(path):
        print(f"Path '{path}' does not exist.")
    
    # If it's a directory, calculate the total size of all files inside
    if os.path.isdir(path):
        total_size = sum(os.path.getsize(os.path.join(path, f)) for f in os.listdir(path))
        print(f"The size of the folder '{path}' is {total_size / 1024:.2f} KB.")
        return total_size / 1024
    # If it's a file, get its size
    elif os.path.isfile(path):
        file_size = os.path.getsize(path)
        print(f"The size of the file '{path}' is {file_size / 1024:.2f} KB.")
        return file_size / 1024
    # If it's neither a file nor a directory, return an error message
    else:
        print(f"'{path}' is neither a file nor a directory.")
        return -1

In [162]:
book=read_book('The Adventures of Sherlock Holmes.txt')

split_chapters_into_files(book,'Chapters')

compress_files('Chapters','The Adventures of Sherlock Holmes',kind='zip')
compress_files( 'Chapters','The Adventures of Sherlock Holmes',kind='tar')

def test_get_size():
    # Check the size of the Chapters folder
    folder_size=get_size("Chapters")
    assert folder_size>=555 and folder_size<=565,f"Unexpected folder size:{folder_size}KB"
    # Check the size of the zip file
    zip_size=get_size("The Adventures of Sherlock Holmes.zip")
    assert zip_size>=555 and zip_size<=575,f"Unexpected zip file size:{zip_size}KB"
    # Check the size of the tar file
    tar_size=get_size("The Adventures of Sherlock Holmes.tar")
    assert tar_size>=555 and tar_size<=595,f"Unexpected tar file size:{tar_size}KB"
test_get_size()

['chapter_0.txt', 'chapter_1.txt', 'chapter_10.txt', 'chapter_11.txt', 'chapter_2.txt', 'chapter_3.txt', 'chapter_4.txt', 'chapter_5.txt', 'chapter_6.txt', 'chapter_7.txt', 'chapter_8.txt', 'chapter_9.txt']
['chapter_0.txt', 'chapter_1.txt', 'chapter_10.txt', 'chapter_11.txt', 'chapter_2.txt', 'chapter_3.txt', 'chapter_4.txt', 'chapter_5.txt', 'chapter_6.txt', 'chapter_7.txt', 'chapter_8.txt', 'chapter_9.txt']
The size of the folder 'Chapters' is 559.52 KB.
The size of the file 'The Adventures of Sherlock Holmes.zip' is 560.74 KB.
The size of the file 'The Adventures of Sherlock Holmes.tar' is 560.74 KB.
