In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


In [3]:
def type_extraction(price):
    if '$' in price:
        return "DOLLAR"
    elif 'â‚¬' in price:
        return "EURO"
    else:
        return price.str.replace(',', '.')

In [4]:
df = pd.read_json("datas/task1_cleaned.json")
df.head()

Unnamed: 0,id,title,author,genre,publisher,year,price
0,10292064894005717421,"Look Homeward, Angel",Prof. Teressa Kautzer,Humor,Brill Publishers,2010,$87.25
1,13029911509625386835,The Yellow Meads of Asphodel,Domingo Weimann,Reference book,Sams Publishing,2018,$31.99
2,12880574241579659568,A Catskill Eagle,Dayle Orn,Comic/Graphic Novel,Apress,2011,â‚¬5.99
3,13301315742612799364,Der Richter und sein Henker,Elias von Kolb,Tall tale,Centaurus Verlag,1995,$75.00
4,16372759776603821045,After Many a Summer Dies the Swan,Carter Legros,Metafiction,University of Minnesota Press,2004,$52.0


In [5]:
df['type'] = df.price.apply(type_extraction)

In [6]:
df.head()

Unnamed: 0,id,title,author,genre,publisher,year,price,type
0,10292064894005717421,"Look Homeward, Angel",Prof. Teressa Kautzer,Humor,Brill Publishers,2010,$87.25,DOLLAR
1,13029911509625386835,The Yellow Meads of Asphodel,Domingo Weimann,Reference book,Sams Publishing,2018,$31.99,DOLLAR
2,12880574241579659568,A Catskill Eagle,Dayle Orn,Comic/Graphic Novel,Apress,2011,â‚¬5.99,EURO
3,13301315742612799364,Der Richter und sein Henker,Elias von Kolb,Tall tale,Centaurus Verlag,1995,$75.00,DOLLAR
4,16372759776603821045,After Many a Summer Dies the Swan,Carter Legros,Metafiction,University of Minnesota Press,2004,$52.0,DOLLAR


In [7]:
df.price = df.price.str.replace('â‚¬', '').str.replace('$', '').str.replace(',', '')

In [8]:
df.price = df.price.astype(float)

In [9]:
df.price.min()

np.float64(4.0)

In [10]:
df.loc[df['type'] == 'EURO', 'price'] = df.loc[df['type'] == 'EURO', 'price'] * 1.2

In [11]:
df.drop(columns=['type'], inplace=True)

In [12]:
df.head()

Unnamed: 0,id,title,author,genre,publisher,year,price
0,10292064894005717421,"Look Homeward, Angel",Prof. Teressa Kautzer,Humor,Brill Publishers,2010,87.25
1,13029911509625386835,The Yellow Meads of Asphodel,Domingo Weimann,Reference book,Sams Publishing,2018,31.99
2,12880574241579659568,A Catskill Eagle,Dayle Orn,Comic/Graphic Novel,Apress,2011,7.188
3,13301315742612799364,Der Richter und sein Henker,Elias von Kolb,Tall tale,Centaurus Verlag,1995,75.0
4,16372759776603821045,After Many a Summer Dies the Swan,Carter Legros,Metafiction,University of Minnesota Press,2004,52.0


In [13]:
df.price

0       87.250
1       31.990
2        7.188
3       75.000
4       52.000
         ...  
4998    72.500
4999    56.000
5000    80.388
5001    29.000
5002    97.500
Name: price, Length: 5003, dtype: float64

In [14]:
df.columns

Index(['id', 'title', 'author', 'genre', 'publisher', 'year', 'price'], dtype='object')

In [17]:
summary_table = df.groupby('year').agg(
    book_count=('id', 'count'),
    average_price=('price', 'mean')
).reset_index()
summary_table['average_price'] = summary_table['average_price'].round(2)
summary_table = summary_table.rename(columns={'year': 'publication_year'})
summary_table = summary_table.sort_values('publication_year').reset_index(drop=type)
print(summary_table)


    publication_year  book_count  average_price
0               1871          43          48.08
1               1883          56          52.51
2               1886          54          54.73
3               1904          37          54.74
4               1905          59          50.62
5               1938          42          46.76
6               1955          49          54.83
7               1958          32          44.17
8               1986         104          45.63
9               1987         120          50.44
10              1988         153          49.91
11              1989         103          50.39
12              1990         122          52.12
13              1991          94          50.01
14              1992         101          50.46
15              1993         114          51.80
16              1994         131          50.55
17              1995         112          44.93
18              1996         111          54.12
19              1997         111        