In [957]:
import pandas as pd

In [958]:
df_lego = pd.read_csv('../00_CSV_Descargado/df_lego_work.csv')

df_lego.head()

Unnamed: 0,SetID,Number,YearFrom,Category,Theme,Subtheme,SetName,ImageFilename,USRetailPrice,Pieces,...,AgeMin,AgeMax,OwnCount,WantCount,Released,Rating,BrickLinkSoldPriceNew,BrickLinkSoldPriceUsed,LaunchDate,ExitDate
0,7530,10,1973,Normal,Duplo,,Pre-School Set,,,17.0,...,,,16,22,X,0,,,,
1,7531,20,1973,Normal,Duplo,,Building Set,,,25.0,...,,,17,24,X,0,,,,
2,1119,28,1979,Normal,Duplo,,Nursery Furniture,028-1,,7.0,...,,,50,31,X,0,,11.72,,
3,7532,30,1973,Normal,Duplo,,Building Set,,,29.0,...,,,17,23,X,0,,,,
4,1382,32,1979,Normal,Duplo,,Living Room Furniture,032-1,,14.0,...,,,69,27,X,0,,10.52,,


***Antes de crear columnas voy a limpiar los nulos y las columnas***

In [959]:
null_counts = df_lego.isnull().sum()

In [960]:
# Veo solo las columnas que tienen valores nulos
null_counts = null_counts[null_counts > 0]

In [961]:
null_counts

Subtheme                  1448
ImageFilename              486
USRetailPrice             2741
Pieces                     301
Minifigs                  3322
Width                     3049
Height                    3050
Depth                     3053
Weight                    6334
AgeMin                    3587
AgeMax                    7160
BrickLinkSoldPriceNew     1357
BrickLinkSoldPriceUsed    2464
LaunchDate                3380
ExitDate                  3410
dtype: int64

In [962]:
# Reemplazo los valores nulos en 'Subtheme' por 'Unknown'
df_lego['Subtheme'] = df_lego['Subtheme'].fillna('Unknown')

In [None]:
# Reemplazo nulos por 0 en las columnas numéricas seleccionadas
columns_zero = [
    'Pieces', 'BrickLinkSoldPriceNew', 'BrickLinkSoldPriceNewUS','USRetailPrice',
    'BrickLinkSoldPriceUsed', 'Depth', 'Height', 'Width', 'Weight', 'Minifigs', 'AgeMin', 'AgeMax'
]

for col in columns_zero:
    if col in df_lego.columns:
        df_lego[col] = df_lego[col].fillna(0)

In [964]:
# Reemplazo los valores nulos en 'ImageFilename' por 'Unknown'
df_lego['ImageFilename'] = df_lego['ImageFilename'].fillna('Unknown')

In [965]:
# Convierto a formato de fecha para manejar valores nulos
df_lego['LaunchDate'] = pd.to_datetime(df_lego['LaunchDate'], errors='coerce')
df_lego['ExitDate'] = pd.to_datetime(df_lego['ExitDate'], errors='coerce')

# Calculo la duración en años de los sets con datos disponibles
df_lego['Duration'] = (df_lego['ExitDate'] - df_lego['LaunchDate']).dt.days / 365.25

# Calculo la mediana de duración por Theme
theme_median_duration = df_lego.groupby('Theme')['Duration'].median()

  df_lego['ExitDate'] = pd.to_datetime(df_lego['ExitDate'], errors='coerce')


In [966]:
# Relleno ExitDate usando la mediana de duración por Theme
for theme, median_duration in theme_median_duration.items():
    mask = (df_lego['Theme'] == theme) & df_lego['ExitDate'].isna() & df_lego['LaunchDate'].notna()
    df_lego.loc[mask, 'ExitDate'] = df_lego.loc[mask, 'LaunchDate'] + pd.to_timedelta(median_duration * 365.25, unit='D')

# Relleno LaunchDate usando YearFrom para los valores NaN
mask_launch = df_lego['LaunchDate'].isna() & df_lego['YearFrom'].notna()
df_lego.loc[mask_launch, 'LaunchDate'] = pd.to_datetime(df_lego.loc[mask_launch, 'YearFrom'].astype(int).astype(str) + '-01-01')

In [967]:
# Extraigo año y mes en nuevas columnas
df_lego['LaunchYear'] = df_lego['LaunchDate'].dt.year
df_lego['LaunchMonth'] = df_lego['LaunchDate'].dt.month
df_lego['ExitYear'] = df_lego['ExitDate'].dt.year
df_lego['ExitMonth'] = df_lego['ExitDate'].dt.month

# Elimino las columnas originales y la auxiliar
df_lego.drop(columns=['LaunchDate', 'ExitDate', 'Duration'], inplace=True)

In [968]:
# Calculamos la duración en años de los sets con datos disponibles
df_lego['Duration'] = df_lego['ExitYear'] - df_lego['LaunchYear']

# Calculo la duración media por tema, ignorando NaN
theme_avg_duration = df_lego.groupby('Theme')['Duration'].mean()

# Calculo la duración media por año de lanzamiento, ignorando NaN
year_avg_duration = df_lego.groupby('LaunchYear')['Duration'].mean()

In [969]:
# Relleno los valores nulos de ExitYear y ExitMonth usando valores calculados
for index, row in df_lego.iterrows():
    if pd.isna(row['ExitYear']) and not pd.isna(row['LaunchYear']):
        theme_duration = theme_avg_duration.get(row['Theme'], None)
        year_duration = year_avg_duration.get(row['LaunchYear'], None)

        # Usar la duración del tema si está disponible, si no, la del año de lanzamiento
        estimated_duration = theme_duration if pd.notna(theme_duration) else year_duration

        if pd.notna(estimated_duration):  # Solo asignar si hay un valor válido
            df_lego.at[index, 'ExitYear'] = int(row['LaunchYear'] + round(estimated_duration))
            df_lego.at[index, 'ExitMonth'] = 12  # Usar diciembre como mes estimado de retiro

# Elimino de nuevo la columna auxiliar de duración
df_lego.drop(columns=['Duration'], inplace=True)

In [970]:
df_lego.head()

Unnamed: 0,SetID,Number,YearFrom,Category,Theme,Subtheme,SetName,ImageFilename,USRetailPrice,Pieces,...,OwnCount,WantCount,Released,Rating,BrickLinkSoldPriceNew,BrickLinkSoldPriceUsed,LaunchYear,LaunchMonth,ExitYear,ExitMonth
0,7530,10,1973,Normal,Duplo,Unknown,Pre-School Set,Unknown,0.0,17.0,...,16,22,X,0,0.0,0.0,1973,1,1974.0,12.0
1,7531,20,1973,Normal,Duplo,Unknown,Building Set,Unknown,0.0,25.0,...,17,24,X,0,0.0,0.0,1973,1,1974.0,12.0
2,1119,28,1979,Normal,Duplo,Unknown,Nursery Furniture,028-1,0.0,7.0,...,50,31,X,0,0.0,11.72,1979,1,1980.0,12.0
3,7532,30,1973,Normal,Duplo,Unknown,Building Set,Unknown,0.0,29.0,...,17,23,X,0,0.0,0.0,1973,1,1974.0,12.0
4,1382,32,1979,Normal,Duplo,Unknown,Living Room Furniture,032-1,0.0,14.0,...,69,27,X,0,0.0,10.52,1979,1,1980.0,12.0


**Reviso los tipos de datos**

In [971]:
df_lego.dtypes


SetID                       int64
Number                     object
YearFrom                    int64
Category                   object
Theme                      object
Subtheme                   object
SetName                    object
ImageFilename              object
USRetailPrice             float64
Pieces                    float64
Minifigs                  float64
PackagingType              object
Availability               object
Width                     float64
Height                    float64
Depth                     float64
Weight                    float64
AgeMin                    float64
AgeMax                    float64
OwnCount                    int64
WantCount                   int64
Released                   object
Rating                      int64
BrickLinkSoldPriceNew     float64
BrickLinkSoldPriceUsed    float64
LaunchYear                  int32
LaunchMonth                 int32
ExitYear                  float64
ExitMonth                 float64
dtype: object

In [972]:
# Convertimos ExitYear y ExitMonth a enteros
df_lego['ExitYear'] = df_lego['ExitYear'].astype(int)
df_lego['ExitMonth'] = df_lego['ExitMonth'].astype(int)

In [973]:
# Elimino la columna 'Released' porque no aporta información útil
df_lego.drop(columns=['Released'], inplace=True)


In [974]:
df_lego['PackagingType'].value_counts()

PackagingType
Box                      5260
Foil pack                1232
{Not specified}          1169
Polybag                   769
Paper bag                 201
Blister pack              115
Bucket                     86
None (loose parts)         79
Tub                        64
Plastic box                63
Box with handle            33
Other                      32
Plastic canister           31
Zip-lock bag               17
Shrink-wrapped             15
Canister                   14
Metal canister             14
Box with backing card      11
Name: count, dtype: int64

In [975]:
# Normalizo 'PackagingType'
df_lego['PackagingType'] = df_lego['PackagingType'].replace({
    '{Not specified}': 'Unknown',
    'Plastic canister': 'Canister',
    'Plastic box': 'Box',
    'Metal canister': 'Canister',
    'Box with handle': 'Box',
    'Box with backing card': 'Box',
    'None (loose parts)': 'None'
})

In [976]:
df_lego['Availability'].value_counts()

Availability
Retail                   5182
{Not specified}          1507
Retail - limited         1349
LEGO exclusive            580
Promotional               418
Educational                93
Promotional (Airline)      27
Unknown                    21
Not sold                   21
LEGOLAND exclusive          7
Name: count, dtype: int64

In [977]:
# Normalizo 'Availability'
df_lego['Availability'] = df_lego['Availability'].replace({
    '{Not specified}': 'Unknown',
    'Promotional (Airline)': 'Promotional'
})

In [978]:
# Reasigno los sets de "Creator Expert" al tema "Icons". Es una serie que ha cambiado de nombre últimamente y puede hacer que los datos no sean consistentes.
df_lego.loc[df_lego['Theme'] == 'Creator Expert', 'Theme'] = 'Icons'

***FUNCION LIMPIEZA Y NORMALIZACIÓN***

In [None]:
import pandas as pd
import numpy as np
from datetime import datetime

def clean_lego_data(df_lego):
    # Reemplazo los valores nulos en 'Subtheme' por 'Unknown'
    df_lego['Subtheme'] = df_lego['Subtheme'].fillna('Unknown')

    # Reemplazo nulos por 0 en las columnas numéricas seleccionadas
    columns_zero = [
        'Pieces', 'BrickLinkSoldPriceNew', 'BrickLinkSoldPriceNewUS', 'USRetailPrice',
        'BrickLinkSoldPriceUsed', 'Depth', 'Height', 'Width', 'Weight', 'Minifigs', 'AgeMin', 'AgeMax'
    ]

    for col in columns_zero:
        if col in df_lego.columns:
            df_lego[col] = df_lego[col].fillna(0)

    # Reemplazo los valores nulos en 'ImageFilename' por 'Unknown'
    df_lego['ImageFilename'] = df_lego['ImageFilename'].fillna('Unknown')

    # Convierto a formato de fecha para manejar valores nulos
    df_lego['LaunchDate'] = pd.to_datetime(df_lego['LaunchDate'], errors='coerce')
    df_lego['ExitDate'] = pd.to_datetime(df_lego['ExitDate'], errors='coerce')

    # Calcula la duración en años de los sets con datos disponibles
    df_lego['Duration'] = (df_lego['ExitDate'] - df_lego['LaunchDate']).dt.days / 365.25

    # Calcular la mediana de duración por Theme
    theme_median_duration = df_lego.groupby('Theme')['Duration'].median()

    # Relleno ExitDate usando la mediana de duración por Theme
    for theme, median_duration in theme_median_duration.items():
        mask = (df_lego['Theme'] == theme) & df_lego['ExitDate'].isna() & df_lego['LaunchDate'].notna()
        df_lego.loc[mask, 'ExitDate'] = df_lego.loc[mask, 'LaunchDate'] + pd.to_timedelta(median_duration * 365.25, unit='D')

    # Relleno LaunchDate usando YearFrom para los valores NaN
    mask_launch = df_lego['LaunchDate'].isna() & df_lego['YearFrom'].notna()
    df_lego.loc[mask_launch, 'LaunchDate'] = pd.to_datetime(df_lego.loc[mask_launch, 'YearFrom'].astype(int).astype(str) + '-01-01')

    # Extraigo año y mes en nuevas columnas
    df_lego['LaunchYear'] = df_lego['LaunchDate'].dt.year
    df_lego['LaunchMonth'] = df_lego['LaunchDate'].dt.month
    df_lego['ExitYear'] = df_lego['ExitDate'].dt.year
    df_lego['ExitMonth'] = df_lego['ExitDate'].dt.month

    # Elimino las columnas originales y la auxiliar
    df_lego.drop(columns=['LaunchDate', 'ExitDate', 'Duration'], inplace=True)

    # Calculamos la duración en años de los sets con datos disponibles
    df_lego['Duration'] = df_lego['ExitYear'] - df_lego['LaunchYear']

    # Calculo la duración media por tema, ignorando NaN
    theme_avg_duration = df_lego.groupby('Theme')['Duration'].mean()

    # Calculo la duración media por año de lanzamiento, ignorando NaN
    year_avg_duration = df_lego.groupby('LaunchYear')['Duration'].mean()

    # Relleno los valores nulos de ExitYear y ExitMonth usando valores calculados
    for index, row in df_lego.iterrows():
        if pd.isna(row['ExitYear']) and not pd.isna(row['LaunchYear']):
            theme_duration = theme_avg_duration.get(row['Theme'], None)
            year_duration = year_avg_duration.get(row['LaunchYear'], None)

            # Usar la duración del tema si está disponible, si no, la del año de lanzamiento
            estimated_duration = theme_duration if pd.notna(theme_duration) else year_duration

            if pd.notna(estimated_duration):  # Solo asignar si hay un valor válido
                df_lego.at[index, 'ExitYear'] = int(row['LaunchYear'] + round(estimated_duration))
                df_lego.at[index, 'ExitMonth'] = 12  # Usar diciembre como mes estimado de retiro

    # Elimino de nuevo la columna auxiliar de duración
    df_lego.drop(columns=['Duration'], inplace=True)

    # Normalizo 'PackagingType'
    df_lego['PackagingType'] = df_lego['PackagingType'].replace({
        '{Not specified}': 'Unknown',
        'Plastic canister': 'Canister',
        'Plastic box': 'Box',
        'Metal canister': 'Canister',
        'Box with handle': 'Box',
        'Box with backing card': 'Box',
        'None (loose parts)': 'None'
    })

    # Normalizo 'Availability'
    df_lego['Availability'] = df_lego['Availability'].replace({
        '{Not specified}': 'Unknown',
        'Promotional (Airline)': 'Promotional'
    })
    # Reasigno los sets de "Creator Expert" al tema "Icons". Es una serie que ha cambiado de nombre últimamente y puede hacer que los datos no sean consistentes.
    df_lego.loc[df_lego['Theme'] == 'Creator Expert', 'Theme'] = 'Icons'

    return df_lego

***FUNCION NORMALIZACIÓN***

In [980]:
df_lego.to_csv("df_lego_work_limpio.csv", index=False)


In [981]:
df_lego

Unnamed: 0,SetID,Number,YearFrom,Category,Theme,Subtheme,SetName,ImageFilename,USRetailPrice,Pieces,...,AgeMax,OwnCount,WantCount,Rating,BrickLinkSoldPriceNew,BrickLinkSoldPriceUsed,LaunchYear,LaunchMonth,ExitYear,ExitMonth
0,7530,10,1973,Normal,Duplo,Unknown,Pre-School Set,Unknown,0.0,17.0,...,0.0,16,22,0,0.0,0.00,1973,1,1974,12
1,7531,20,1973,Normal,Duplo,Unknown,Building Set,Unknown,0.0,25.0,...,0.0,17,24,0,0.0,0.00,1973,1,1974,12
2,1119,28,1979,Normal,Duplo,Unknown,Nursery Furniture,028-1,0.0,7.0,...,0.0,50,31,0,0.0,11.72,1979,1,1980,12
3,7532,30,1973,Normal,Duplo,Unknown,Building Set,Unknown,0.0,29.0,...,0.0,17,23,0,0.0,0.00,1973,1,1974,12
4,1382,32,1979,Normal,Duplo,Unknown,Living Room Furniture,032-1,0.0,14.0,...,0.0,69,27,0,0.0,10.52,1979,1,1980,12
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9200,22656,WISHINGWELL,2013,Other,Friends,Promotional,Wishing Well,WISHINGWELL-1,0.0,28.0,...,0.0,216,506,0,0.0,0.00,2013,1,2014,12
9201,30208,XWING,2019,Other,Star Wars,Promotional,X-wing Trench Run,XWING-2,0.0,52.0,...,0.0,133,747,0,14.7,21.16,2019,1,2020,12
9202,29327,XWING,2019,Other,Star Wars,Promotional,Mini X-wing Fighter,XWING-1,0.0,60.0,...,0.0,141,582,0,0.0,0.00,2019,1,2020,12
9203,22978,YODA,2013,Other,Star Wars,Promotional,"Yoda minifig, NY I Heart Torso",YODA-1,0.0,3.0,...,0.0,249,1967,4,2826.3,0.00,2013,1,2014,12


**Comienzo el proceso de creación de columnas**

In [982]:
import pandas as pd
from datetime import datetime

# Obtengo el año actual
current_year = datetime.now().year

# Calculo los años desde la retirada del set
df_lego['YearsSinceExit'] = current_year - df_lego['ExitYear']

In [None]:
# Reemplazo valores negativos con 0
df_lego['YearsSinceExit'] = df_lego['YearsSinceExit'].apply(lambda x: max(x, 0))


In [984]:
# Reemplazo valores NaN por 0 antes de la conversión
df_lego['YearsSinceExit'] = df_lego['YearsSinceExit'].fillna(0).astype(int)


In [985]:
# Creo la columna 'PriceChange' para calcular porcentaje del cambio de precio entre el precio de venta en BrickLink y el precio de venta el la web de Lego en EEUU
df_lego['PriceChange'] = ((df_lego['BrickLinkSoldPriceNew'] - df_lego['USRetailPrice']) / df_lego['USRetailPrice']) * 100
df_lego['PriceChange'] = df_lego['PriceChange'].fillna(0)  # Reemplazar nulos por 0 porque hay precios que no tenemos disponibles

In [986]:
df_lego['ResaleDemand'] = df_lego.apply(lambda row: row['BrickLinkSoldPriceNew'] / row['BrickLinkSoldPriceUsed'] if row['BrickLinkSoldPriceUsed'] > 0 else 0, axis=1)
df_lego['AppreciationTrend'] = df_lego.apply(lambda row: row['PriceChange'] / row['YearsSinceExit'] if row['YearsSinceExit'] > 0 else 0, axis=1)

In [987]:
#Creo una columna para indicar si el tamaño del set es pequeño, mediano o grande
size_conditions = [
    (df_lego['Pieces'] < 250),
    (df_lego['Pieces'].between(250, 1000)),
    (df_lego['Pieces'] > 1000)
]
size_labels = ['Small', 'Medium', 'Large']
df_lego['SizeCategory'] = pd.cut(df_lego['Pieces'], bins=[0, 249, 1000, float('inf')], labels=size_labels, include_lowest=True)

In [988]:
# Definimos sets exclusivos según categorías que mejor revalorización tienen
exclusive_themes = ['Star Wars', 'Modular Buildings', 'Icons', 'Ideas', 'Creator Expert', 'Harry Potter', 'Marvel Super Heroes','Ghostbusters','Icons','The Lord of the Rings','Pirates of the Caribbean','Pirates','Trains','Architecture']
df_lego['Exclusivity'] = df_lego['Theme'].apply(lambda x: 'Exclusive' if x in exclusive_themes else 'Regular')

In [989]:
import numpy as np

In [990]:
# Calculo ThemePopularity evitando divisiones por cero o valores infinitos
theme_popularity = df_lego.groupby('Theme')['PriceChange'].mean().replace([np.inf, -np.inf], np.nan)
df_lego['ThemePopularity'] = df_lego['Theme'].map(theme_popularity).fillna(0)

In [991]:
# Calculamos InvestmentScore asegurando que no haya valores inf o NaN originados por PriceChange
df_lego['InvestmentScore'] = df_lego.apply(lambda row: (row['PriceChange'] * 0.4) +
                                                     (row['AppreciationTrend'] * 0.3) +
                                                     (row['ThemePopularity'] * 0.2) +
                                                     (10 if row['Exclusivity'] == 'Exclusive' else 0), axis=1)


In [992]:
df_lego['InvestmentScore'].unique()

array([  0.        ,          inf, -40.63829787, ..., -31.76470588,
       -33.33333333, -34.28571429])

In [993]:
# Calculamos el incremento de precio anual desde que el set fue retirado
df_lego['AnnualPriceIncrease'] = (df_lego['BrickLinkSoldPriceNew'] - df_lego['USRetailPrice']) / df_lego['YearsSinceExit']

# Reemplazamos valores infinitos o NaN (por si hay sets con YearsSinceExit = 0)
df_lego.replace([np.inf, -np.inf], np.nan, inplace=True)
df_lego['AnnualPriceIncrease'].fillna(0, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_lego['AnnualPriceIncrease'].fillna(0, inplace=True)


In [None]:
# Calculamos el porcentaje de aumento anual del precio desde que el set fue retirado
df_lego['AnnualPercentageIncrease'] = ((df_lego['BrickLinkSoldPriceNew'] - df_lego['USRetailPrice']) /
                                       (df_lego['USRetailPrice'] * df_lego['YearsSinceExit'])) * 100

# Reemplazamos valores infinitos o NaN (por si hay YearsSinceExit o USRetailPrice en 0)
df_lego.replace([np.inf, -np.inf], np.nan, inplace=True)
df_lego['AnnualPercentageIncrease'].fillna(0, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_lego['AnnualPercentageIncrease'].fillna(0, inplace=True)


In [None]:
# Selecciono los temas definitivos a estudiar en el trabajo
selected_themes = [
    "Speed Champions", "Architecture", "BrickHeadz", "Star Wars", "Ideas", "Collectable Minifigures",
    "Technic", "Minecraft", "Harry Potter", "Icons", "Ninjago", "Education", "Jurassic World",
    "Duplo", "DC Comics Super Heroes", "Marvel Super Heroes", "Creator", "City", "Friends",
    "Classic", "Disney"
]

# Filtro el dataframe para que solo contenga los temas seleccionados
df_lego_final = df_lego[df_lego['Theme'].isin(selected_themes)].copy()


In [996]:
df_lego_final.to_csv("df_lego_final.csv", index=False)


In [None]:
df_lego_final

In [None]:
# Convierto ExitYear a formato numérico en caso de ser necesario
df_lego_final['ExitYear'] = pd.to_numeric(df_lego_final['ExitYear'], errors='coerce')

# Creo el dataframe con sets retirados (ExitYear < 2025)
df_lego_final_retirados = df_lego_final[df_lego_final['ExitYear'] < 2025].copy()

# Creo el dataframe con sets aún en venta (ExitYear >= 2025)
df_lego_final_venta = df_lego_final[df_lego_final['ExitYear'] >= 2025].copy()


In [1026]:
df_lego_final_retirados.to_csv("df_lego_final_retirados.csv", index=False)
df_lego_final_venta.to_csv("df_lego_final_venta.csv", index=False)


In [1027]:
df_lego_final_retirados

Unnamed: 0,SetID,Number,YearFrom,Category,Theme,Subtheme,SetName,ImageFilename,USRetailPrice,Pieces,...,YearsSinceExit,PriceChange,ResaleDemand,AppreciationTrend,SizeCategory,Exclusivity,ThemePopularity,InvestmentScore,AnnualPriceIncrease,AnnualPercentageIncrease
0,7530,10,1973,Normal,Duplo,Unknown,Pre-School Set,Unknown,0.0,17.0,...,51,0.0,0.000000,0.0,Small,Regular,0.0,0.0,0.000000,0.0
1,7531,20,1973,Normal,Duplo,Unknown,Building Set,Unknown,0.0,25.0,...,51,0.0,0.000000,0.0,Small,Regular,0.0,0.0,0.000000,0.0
2,1119,28,1979,Normal,Duplo,Unknown,Nursery Furniture,028-1,0.0,7.0,...,45,0.0,0.000000,0.0,Small,Regular,0.0,0.0,0.000000,0.0
3,7532,30,1973,Normal,Duplo,Unknown,Building Set,Unknown,0.0,29.0,...,51,0.0,0.000000,0.0,Small,Regular,0.0,0.0,0.000000,0.0
4,1382,32,1979,Normal,Duplo,Unknown,Living Room Furniture,032-1,0.0,14.0,...,45,0.0,0.000000,0.0,Small,Regular,0.0,0.0,0.000000,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9200,22656,WISHINGWELL,2013,Other,Friends,Promotional,Wishing Well,WISHINGWELL-1,0.0,28.0,...,11,0.0,0.000000,0.0,Small,Regular,0.0,0.0,0.000000,0.0
9201,30208,XWING,2019,Other,Star Wars,Promotional,X-wing Trench Run,XWING-2,0.0,52.0,...,5,,0.694707,,Small,Exclusive,0.0,,2.940000,0.0
9202,29327,XWING,2019,Other,Star Wars,Promotional,Mini X-wing Fighter,XWING-1,0.0,60.0,...,5,0.0,0.000000,0.0,Small,Exclusive,0.0,10.0,0.000000,0.0
9203,22978,YODA,2013,Other,Star Wars,Promotional,"Yoda minifig, NY I Heart Torso",YODA-1,0.0,3.0,...,11,,0.000000,,Small,Exclusive,0.0,,256.936364,0.0


In [1028]:
df_lego_final_venta

Unnamed: 0,SetID,Number,YearFrom,Category,Theme,Subtheme,SetName,ImageFilename,USRetailPrice,Pieces,...,YearsSinceExit,PriceChange,ResaleDemand,AppreciationTrend,SizeCategory,Exclusivity,ThemePopularity,InvestmentScore,AnnualPriceIncrease,AnnualPercentageIncrease
2511,31025,10280,2021,Normal,Icons,Botanical Collection,Flower Bouquet,10280-1,59.99,756.0,...,0,-36.739457,1.267535,0.0,Medium,Exclusive,0.0,-4.695783,0.0,0.0
2512,30970,10281,2021,Normal,Icons,Botanical Collection,Bonsai Tree,10281-1,49.99,878.0,...,0,-28.685737,1.272305,0.0,Medium,Exclusive,0.0,-1.474295,0.0,0.0
2522,31845,10294,2021,Normal,Icons,Miscellaneous,Titanic,10294-1,679.99,9090.0,...,0,-23.810644,1.047579,0.0,Large,Exclusive,0.0,0.475742,0.0,0.0
2523,31389,10295,2021,Normal,Icons,Vehicles,Porsche 911,10295-1,169.99,1458.0,...,0,-24.236720,1.288931,0.0,Large,Exclusive,0.0,0.305312,0.0,0.0
2524,32116,10297,2022,Normal,Icons,Modular Buildings Collection,Boutique Hotel,10297-1,229.99,3066.0,...,0,-24.953259,1.155056,0.0,Large,Exclusive,0.0,0.018696,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9048,50380,6535736,2024,Extended,Technic,Promotional,McLaren P1 Logo,6535736-1,0.00,178.0,...,0,0.000000,0.000000,0.0,Small,Regular,0.0,0.000000,0.0,0.0
9049,50392,6541140,2024,Extended,Harry Potter,Promotional,Platform 9 3/4,6541140-1,0.00,103.0,...,0,,0.000000,0.0,Small,Exclusive,0.0,,0.0,0.0
9050,50408,6545695,2024,Extended,Harry Potter,Promotional,Hogwarts Express,6545695-1,0.00,42.0,...,0,0.000000,0.000000,0.0,Small,Exclusive,0.0,10.000000,0.0,0.0
9051,50970,6562113,2025,Extended,City,Promotional,Soap Box Racer M&T,LBR2502-1,0.00,39.0,...,0,,0.000000,0.0,Small,Regular,0.0,,0.0,0.0
