TP CSV
===

In [None]:
import csv
from IPython.display import HTML, display
from operator import itemgetter, attrgetter

def import_csv2(filename, types=None):
    """
    ouvre un fichier et retourne une liste de dictionnaires
    on peut préciser les types pour chaque colonne exemple types=['str','int','float','str'],
    dans ce cas le contenu est casté (changement de type)
    """
    with open(filename, newline='', encoding='utf8') as csvfile:
        reader = csv.DictReader(csvfile, delimiter=',')
        # Si on a précise les types
        if types != None:
            result=[]
            for row in reader:
                keys = [k for k in row.keys()]
                values = [v for v in row.values()]
                updates = {}
                for i in range(len(types)):
                    if types[i]=='int':
                        updates[keys[i]]=int(values[i])
                    elif types[i]=='float':
                        updates[keys[i]]=float(values[i])
                row.update(updates)
                result.append(row)
            return result
        # Si on n'a pas précisé de type
        else:
            return list(reader)
        
def filter_colums(datas, list_of_column_names, action='keep'):
    """
    Retourne les données filtrées par colonne
    list_of_column_names : la liste des colonnes
    action : defaut = 'keep' sinon 'remove'
    """
    if datas:
        result=[]
        # récupérons les clés
        keys=[k for k in datas[0].keys()]
        for row in datas:
            new_row = row.copy()
            result.append(new_row)
            for k in keys:
                if action=='remove':
                    if k in list_of_column_names:
                        new_row.pop(k)
                else:
                    if k not in list_of_column_names:
                        new_row.pop(k)
        return result
    return datas

def filter_lines(datas, key, value, operator = '='):
    """
    Filtre les lignes dont la valeur de la clé vaut 'value'
    Les opérateurs peuvent être 'in', '<', '>' par défaut =
    """
    if operator == 'in':
         return [row for row in datas if value.lower() in row[key].lower()]
    elif operator == '>':
        if isinstance(datas[0][key], str):
            return [row for row in datas if row[key].lower() > value.lower()]
        else:
            return [row for row in datas if row[key] > value]
    elif operator == '<':
        if isinstance(datas[0][key], str):
            return [row for row in datas if row[key].lower() < value.lower()]
        else:
            return [row for row in datas if row[key] < value]
    else:
        if isinstance(datas[0][key], int):
            return [row for row in datas if row[key] == int(value)]
        elif isinstance(datas[0][key], float):
            return [row for row in datas if row[key] == float(value)]
        else:
            return [row for row in datas if row[key].lower() == value.lower()]

def display_table(datas):
    """
    Retourne le tableau sous forme de tableau html
    """
    if datas:
        display(HTML(
           '<table><thead><tr><th>{}</th></tr></thead><tbody><tr>{}</tr></tbody></table>'.format(
               '</th><th>'.join([k.title() for k in datas[0].keys()]),
               '</tr><tr>'.join(
                   '<td>{}</td>'.format('</td><td>'.join(str(_) for _ in row.values())) for row in datas)
               )
        ))
    else:
       display(HTML('Rien a afficher')) 

Importation des données depuis le fichier 'datas.csv'
---

In [5]:
from os import path
if not path.exists('datas.csv'):
    import zipfile
    with zipfile.ZipFile('datas.zip', 'r') as zip_ref:
        zip_ref.extractall('.')    

dicos = import_csv2('datas.csv', types=['str','str','str','str','int'])

Tous les titres de Madonna entre 2000 et 2008. Colonnes affichées Titre, artiste, année.
---

In [6]:
f = filter_lines(dicos, 'artist_name', 'madonna')
f = filter_lines(f, 'year', 2000, operator = '>')
f = filter_lines(f, 'year', 2008, operator = '<')
f = filter_colums(f, ['song_id','release'], action='remove')
display_table(f)

Title,Artist_Name,Year
Sorry (Man With Guitar Mix),Madonna,2006
Get Together [Live] [Bonus Track],Madonna,2007
History (Non-Album Track),Madonna,2006
American Life [Headcleanr Rock Mix],Madonna,2003
Isaac [Live],Madonna,2007
Hollywood [Remix] [Live],Madonna,2006
Holiday [Live],Madonna,2006
Sorry [Live],Madonna,2007
I'm So Stupid (Album Version),Madonna,2003
Nobody Knows Me [Live],Madonna,2006


Les trier par année puis par titre

In [7]:
f = sorted(f, key=itemgetter('year','title'))
display_table(f)

Title,Artist_Name,Year
American Life [Headcleanr Rock Mix],Madonna,2003
Easy Ride (Album Version),Madonna,2003
Hollywood (Album Version),Madonna,2003
I'm So Stupid (Album Version),Madonna,2003
Intervention (Album Version),Madonna,2003
Into The Hollywood Groove [featuring Missy Elliott] [The Passengerz Mix],Madonna,2003
Love Profusion (Album Version),Madonna,2003
Nobody Knows Me [Mount Sims Old School Mix],Madonna,2003
Nothing Fails (Album Version),Madonna,2003
Nothing Fails [Nevins Mix],Madonna,2003


Trouver tous les morceaux de l'artiste 'Tricky' de l'album 'blowback'.
---

In [8]:
f = filter_lines(dicos, 'artist_name', 'tricky')
f = filter_lines(f, 'release', 'blowback')
display_table(f)


Song_Id,Title,Release,Artist_Name,Year
SOGJNGD12A8C1384E2,A Song for Yukiko (Album version),Blowback,Tricky,2001
SOGVTEZ12A8C1384C3,Bury the Evidence (Album version),Blowback,Tricky,2001
SOFHDUX12A8C1384E9,The Hawkman Is Coming (Album version),Blowback,Tricky,2001
SOSEJLE12A8C13AFA3,Excess (Album version),Blowback,Tricky,2001
SOHHDAW12A8C1384BA,Your Name (Album version),Blowback,Tricky,2001
SOZKPKS12A8AE488A3,Something in the Way (Album version),Blowback,Tricky,2001
SOEDOHT12A8C13A2D1,Evolution Revolution Love (Album version),Blowback,Tricky,2001
SOYITYB12A8C1384ED,Evolution Revolution Love Remix (Album version),Blowback,Tricky,2001
SOCVFOB12A8C1384D8,Give It to 'Em (Album version),Blowback,Tricky,2001
SOJYVLV12A8C13A2E5,Girls (Album version),Blowback,Tricky,2001


Trouver tous les morceaux dont les artistes commencent par 'ab' et dont l'année est > à 2000
---

In [9]:
f = filter_lines(dicos, 'artist_name', 'ab', operator='>')
f = filter_lines(f, 'artist_name', 'ac', operator='<')
f = filter_lines(f, 'year', 2005, operator = '>')
display_table(f)

Song_Id,Title,Release,Artist_Name,Year
SOYBUYW12A8AE47481,The Haunting,Above The Golden State,Above The Golden State,2008
SOIHKHB12A8C134F00,I Reject!,Generator,Aborym,2006
SOFTCDB12A8C141527,Uptight,Do Whatever Turns You On,Aberfeldy,2006
SOVXNPH12AB01853E1,Dream Odyssey,100 A state Of Trance Tunes,Absolute,2007
SOEKFDZ12AB017C06F,L'Alchimiste,Gibraltar,Abd Al Malik,2006
SOZEFGL12A8AE46F74,Grotesque Modern Art,Programmed to Consume,Abysmal Dawn,2008
SORBEDT12A8AE47389,Enterrement Of An Idol,Strychnine.213,Aborted,2008
SOADXIK12A8C13868F,Impending Doom (Instrumental),From Ashes,Abysmal Dawn,2006
SOQOAXW12AC4687B11,Getting There,Don't Be So Mean,Abe Duque and Gennaro Lefosse,2009
SOUNNOY12AB017FB92,Gibraltar,Gibraltar,Abd Al Malik,2006


les trier par nom d'artiste dans l'ordre descendant.

In [10]:
f=sorted(f, key=itemgetter('artist_name'), reverse=True)
display_table(f)

Song_Id,Title,Release,Artist_Name,Year
SOZEFGL12A8AE46F74,Grotesque Modern Art,Programmed to Consume,Abysmal Dawn,2008
SOADXIK12A8C13868F,Impending Doom (Instrumental),From Ashes,Abysmal Dawn,2006
SOWREYM12A58A7B484,The Descent,Programmed to Consume,Abysmal Dawn,2008
SOFFGOR12AF72A34E5,Blacken The Sky,From Ashes,Abysmal Dawn,2006
SOVDERV12A8AE471C2,Twilight's Fallen,Programmed to Consume,Abysmal Dawn,2008
SOIBAES12A8C1386A9,State Of Mind,From Ashes,Abysmal Dawn,2006
SOLDIPI12A8AE46F76,A Remission of Life,Programmed to Consume,Abysmal Dawn,2008
SOHPYEI12AF72A24FF,In The Hands Of Death,From Ashes,Abysmal Dawn,2006
SODRVWF12A58A7DB20,Programmed to Consume,Programmed to Consume,Abysmal Dawn,2008
SOORFPO12A8C1386A0,Servants To Their Knees,From Ashes,Abysmal Dawn,2006
