In [110]:
import pandas as pd

df = pd.read_csv('cleaned_diamond_data.csv').rename(columns = {'Unnamed: 0' : 'id'})
df.drop('id',inplace=True,axis=1)
df.head(5)

Unnamed: 0,carat,clarity,color,cut,x dimension,y dimension,z dimension,depth,table,price
0,0.5,IF,D,IDEAL,5.1,5.15,3.2,61.5,56.0,3000.0
1,0.7,VVS2,E,PREMIUM,5.7,5.49,3.52,62.0,59.0,4500.0
2,0.5,SI2,H,GOOD,4.3,4.31,3.9,62.3,56.0,700.0
3,1.2,IF,D,IDEAL,5.9,6.82,4.2,61.7,58.0,10000.0
4,0.9,I1,J,FAIR,6.0,5.49,3.7,61.7,56.0,2400.0


In [111]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
cols = df.columns.tolist()

for column in cols:
    if df[column].dtype == 'object':
        df[column] = le.fit_transform(df[column])

In [112]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.feature_selection import RFE
from sklearn.metrics import mean_squared_error

# Podział na zbiór treningowy i testowy
y = df['price']
X = df.drop(columns=['price'])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Budowa modelu regresji
model = LinearRegression()

# Eliminacja wsteczna - RFE (Recursive Feature Elimination)
selector = RFE(model, step=1)
selector = selector.fit(X_train, y_train)

# Wybrane istotne zmienne
selected_features = X_train.columns[selector.support_]
X_train_selected = X_train[selected_features]
X_test_selected = X_test[selected_features]

# Dopasowanie modelu na wybranych zmiennych
model.fit(X_train_selected, y_train)

# Predykcja na zbiorze testowym
y_pred = model.predict(X_test_selected)

# Ocena modelu
mse = mean_squared_error(y_test, y_pred)
print(f'MSE: {mse}')

print("Współczynniki modelu:")
for feature, coef in zip(X.columns, model.coef_):
    print(f"{feature}: {coef}")


MSE: 784508574.852771
Współczynniki modelu:
carat: -12411.479380750197
clarity: 28065.34525043358
color: -22429.359541537164
cut: 3561.32158762409


In [113]:
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
from dash import dash_table
import plotly.express as px
import pandas as pd


# Inicjalizacja aplikacji Dash
app = dash.Dash(__name__)

# Opcje dla kontrolki wyboru zmiennej
variable_options = ['carat', 'clarity', 'color', 'cut', 'x dimension', 'y dimension', 'z dimension', 'depth', 'table']

# Układ strony
app.layout = html.Div(children=[
    html.H1(children='Diamond Data Dashboard'),

    # Kontrolka wyboru zmiennej
    dcc.Dropdown(
        id='variable-dropdown',
        options=[{'label': variable, 'value': variable} for variable in variable_options],
        value='carat',
        multi=False,
        style={'width': '50%'},
        clearable=False
    ),

    # Wizualizacja rozkładu wybranej zmiennej
    dcc.Graph(id='distribution-plot'),

    # Tabela z próbką danych
    html.Div([
        html.H3('Próbka danych'),
        dash_table.DataTable(
            id='sample-data-table',
            columns=[{'name': col, 'id': col} for col in df.columns],
            data=df.sample(10).to_dict('records')
        )
    ])
])

# Funkcja do aktualizacji wykresu i tabeli na podstawie wybranej zmiennej
@app.callback(
    [Output('distribution-plot', 'figure'),
     Output('sample-data-table', 'data')],
    [Input('variable-dropdown', 'value')]
)
def update_plots(selected_variable):
    # Aktualizacja wykresu
    fig = px.histogram(df, x=selected_variable, nbins=50, title=f'Rozkład zmiennej "{selected_variable}"')

    # Aktualizacja tabeli
    sample_data = df.sample(10).to_dict('records')

    return fig, sample_data

# Uruchomienie aplikacji
if __name__ == '__main__':
    app.run_server(debug=True, use_reloader=False)
