In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import scipy as sp
import warnings
warnings.filterwarnings('ignore')

1. Have the size of the sets changed over time?

In [176]:
df_sets = pd.read_csv("/kaggle/input/lego-database/sets.csv")
df_sets.head()

Unnamed: 0,set_num,name,year,theme_id,num_parts
0,00-1,Weetabix Castle,1970,414,471
1,0011-2,Town Mini-Figures,1978,84,12
2,0011-3,Castle 2 for 1 Bonus Offer,1987,199,2
3,0012-1,Space Mini-Figures,1979,143,12
4,0013-1,Space Mini-Figures,1979,143,12


In [177]:
df_sets.sort_values(by='num_parts', ascending = False)[0:4]

Unnamed: 0,set_num,name,year,theme_id,num_parts
170,10189-1,Taj Mahal,2008,276,5922
11614,SWMP-1,Star Wars / M&M Mosaic - Promo Set,2005,169,5461
1337,2000409-1,Window Exploration Bag,2010,507,5200
161,10179-1,Millennium Falcon - UCS,2007,174,5195


In [189]:
df_sets_per_year = df_sets.groupby('year').agg(Mean=('num_parts', np.mean),
                                        Std=('num_parts', np.std),
                                        Theme_nbr = ('theme_id' , pd.Series.nunique),
                                        Set_nbr = ('set_num', pd.Series.nunique))

Fig_1 = px.scatter(df_sets_per_year,
                   x = df_sets_per_year.index,
                   y = 'Mean',
                   labels = {'year':'Year',
                          'Mean' : "Average number of part",
                          'Theme_nbr' : 'Number of theme'},
                   title = "Evolution of the mean size of LEGO's set from 1950",
                   height = 600,
                   width = 1200)

Fig_1.update_traces(marker=dict(size=10)
                   )

Fig_1.show()

if p < 0.05:
    print("The evolution of the size of LEGO's set over time is significant.")
else:
    print("There has not been significant changes in the size of LEGO's set over time.")

The evolution of the size of LEGO's set over time is significant.


In [186]:
Fig_1 = px.scatter(df_sets_per_year,
                   x = df_sets_per_year.index,
                   y = 'Set_nbr',
                   color = 'Theme_nbr',
                   labels = {'year':'Year',
                          'Theme_nbr' : "Number of themes",
                          'Set_nbr':'Number of sets'},
                   title = "Number of sets per year",
                   color_continuous_scale=px.colors.sequential.Viridis,
                   height = 600,
                   width = 1200,
                   size = 'Theme_nbr')


Fig_1.show()

In [188]:
Fig_1 = px.scatter(df_sets_per_year,
                   x = 'Theme_nbr',
                   y = 'Set_nbr',
                   color = 'Mean',
                   size = 'Mean',
                   labels = {'Set_nbr':'Number of sets',
                          'Mean' : "Average number of parts",
                          'Theme_nbr' : 'Number of themes'},
                   height = 600,
                   width = 800)


Fig_1.show()

In [121]:
df_themes = pd.read_csv("/kaggle/input/lego-database/themes.csv")
df_themes.head()

Unnamed: 0,id,name,parent_id
0,1,Technic,
1,2,Arctic Technic,1.0
2,3,Competition,1.0
3,4,Expert Builder,1.0
4,5,Model,1.0


In [149]:
df_merged = pd.merge(df_sets, df_themes, left_on = 'theme_id', right_on = 'id')
df_merged.drop(['theme_id', 'id', 'parent_id'], axis = 1, inplace = True)
df_merged.rename(columns = {'name_x' : 'set',
                            'name_y' : 'theme'}, inplace = True)
df_merged.head()

Unnamed: 0,set,year,num_parts,theme
0,Weetabix Castle,1970,471,Castle
1,Town Mini-Figures,1978,12,Supplemental
2,Castle 2 for 1 Bonus Offer,1987,2,Lion Knights
3,Space Mini-Figures,1979,12,Supplemental
4,Space Mini-Figures,1979,12,Supplemental


In [130]:
df_merged.sort_values(by = 'year', inplace = True)

fig = px.bar(df_merged,
             x = 'theme',
             y = 'num_parts',
            animation_frame = 'year',
            text = 'set')

fig.show()

In [133]:
df_merged.loc[df_merged['year'] == 1955]

Unnamed: 0,set,year,num_parts,theme
648,2 x 2 Bricks,1955,80,Supplemental
646,2 x 3 Bricks,1955,60,Supplemental
644,2 x 4 Bricks,1955,52,Supplemental
650,1 x 2 Bricks,1955,104,Supplemental
653,1 x 1 Bricks,1955,160,Supplemental
693,8 Road Signs,1955,8,Supplemental
706,Esso Pumps/Sign,1955,2,Supplemental
655,1 x 1 Round Bricks,1955,200,Supplemental
696,8 Road Signs,1955,8,Supplemental
642,4 x 4 Corner Bricks,1955,20,Supplemental


In [111]:
sets_size_theme = df_merged.groupby(['theme']).agg(Mean = ('num_parts' , 'mean'), Std = ('num_parts' , 'std')).sort_values(by = 'Mean', ascending = False)[:15]

Fig = px.scatter(sets_size_theme,
                 y = sets_size_theme.index,
                 x = sets_size_theme['Mean'],
                 error_x = 'Std',
                 color = sets_size_theme.index)

Fig.update_traces(marker=dict(size=20))

Fig.show()

In [55]:
sets_size_theme = df_merged.groupby(['theme']).agg({'num_parts' : 'mean'}).sort_values(by = 'num_parts', ascending = False)
sets_size_theme

Unnamed: 0_level_0,num_parts
theme,Unnamed: 1_level_1
Modular Buildings,2350.583333
Mosaic,1843.250000
Sculptures,1716.695652
Ultimate Collector Series,1680.333333
FIRST LEGO League,1387.000000
...,...
Power Functions,1.882353
Imperial Guards,1.000000
Key Chain,0.181818
Samsonite,0.000000


In [None]:
df_parts = pd.read_csv("/kaggle/input/lego-database/parts.csv")
df_parts.head()

In [None]:
df_color = pd.read_csv("/kaggle/input/lego-database/colors.csv")
df_sets = pd.read_csv("/kaggle/input/lego-database/sets.csv")
df_themes = pd.read_csv("/kaggle/input/lego-database/themes.csv")
df_inventory_sets = pd.read_csv("/kaggle/input/lego-database/inventory_sets.csv")
df_inventories = pd.read_csv("/kaggle/input/lego-database/inventories.csv")
df_inventory_parts = pd.read_csv("/kaggle/input/lego-database/inventory_parts.csv")
df_parts = pd.read_csv("/kaggle/input/lego-database/parts.csv")
df_part_categories = pd.read_csv("/kaggle/input/lego-database/part_categories.csv")

In [None]:
df_themes = pd.read_csv("/kaggle/input/lego-database/themes.csv")
df_themes.head()

In [None]:
df_inventory_sets.head()

2. What colors are associated with which theme?