# Loading Data and importing module

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder
import joblib

In [15]:
df=pd.read_csv('datasets/fabric_full_smart_dataset_5000.csv')
print("Dataset Loaded. Columns detected:")
print(df.columns.tolist())
df.head(10)
# df['Performance_Score']

Dataset Loaded. Columns detected:
['Fabric_Type', 'Weight_gsm', 'Breathability', 'Insulation', 'Tensile_Strength', 'Moisture_Absorption', 'Recyclability', 'Biodegradability', 'Cost_Rs_per_meter', 'Terrain', 'Performance_Score']


Unnamed: 0,Fabric_Type,Weight_gsm,Breathability,Insulation,Tensile_Strength,Moisture_Absorption,Recyclability,Biodegradability,Cost_Rs_per_meter,Terrain,Performance_Score
0,Rayon,334.893756,2.650913,8.017219,6.371651,5.012495,1.899774,5.13324,533.877486,Jungle,52.38
1,Spandex,288.284311,1.507704,7.497989,9.446974,1.007009,9.929904,6.557334,936.897083,Cold Desert,83.86
2,Blended Fabric A,173.193325,6.506676,2.255445,3.629302,4.297257,5.10463,8.066584,339.526984,Humid Coast,48.97
3,Wool,269.572662,1.418054,6.467904,2.534717,1.585464,9.53997,9.690688,1222.176155,Cold Desert,64.93
4,Blended Fabric B,111.255076,7.158097,4.961372,2.098344,5.456592,1.309497,9.183884,425.230973,Jungle,43.75
5,Wool,179.747544,5.680612,5.920393,2.66369,9.726262,7.976195,9.45549,1347.499658,Hot Desert,51.62
6,Spandex,262.542072,5.687508,9.650548,8.600805,7.725881,5.857229,6.28076,1449.620196,Jungle,55.92
7,Blended Fabric A,174.807522,2.487402,1.140728,4.810613,4.553934,3.641394,1.126718,338.321486,Jungle,42.33
8,Nylon,332.856173,6.45364,9.336708,6.859693,9.234637,8.650347,5.045056,188.344669,Humid Coast,45.88
9,Rayon,294.029201,6.993301,6.32168,3.472496,6.051191,4.446342,9.745409,1280.925045,Cold Desert,46.14


# Preprocessing 

In [16]:
# Encoder for Fabric Name (Cotton -> 0, Wool -> 1...)
le_fabric = LabelEncoder() # here we converting the row into interger(binary)
df['Fabric_Encoded'] = le_fabric.fit_transform(df['Fabric_Type'])

# Encoder for Terrain (Cold Desert -> 0, Hot Desert -> 1...)
le_terrain = LabelEncoder()
df['Terrain_Encoded'] = le_terrain.fit_transform(df['Terrain'])
df.head(5)

Unnamed: 0,Fabric_Type,Weight_gsm,Breathability,Insulation,Tensile_Strength,Moisture_Absorption,Recyclability,Biodegradability,Cost_Rs_per_meter,Terrain,Performance_Score,Fabric_Encoded,Terrain_Encoded
0,Rayon,334.893756,2.650913,8.017219,6.371651,5.012495,1.899774,5.13324,533.877486,Jungle,52.38,8,3
1,Spandex,288.284311,1.507704,7.497989,9.446974,1.007009,9.929904,6.557334,936.897083,Cold Desert,83.86,9,0
2,Blended Fabric A,173.193325,6.506676,2.255445,3.629302,4.297257,5.10463,8.066584,339.526984,Humid Coast,48.97,1,2
3,Wool,269.572662,1.418054,6.467904,2.534717,1.585464,9.53997,9.690688,1222.176155,Cold Desert,64.93,10,0
4,Blended Fabric B,111.255076,7.158097,4.961372,2.098344,5.456592,1.309497,9.183884,425.230973,Jungle,43.75,2,3


# 3. Select Features (The "Inputs")

In [17]:
# We perform well if we know the Fabric Properties AND the Terrain
feature_columns = [
    'Weight_gsm', 
    'Breathability', 
    'Insulation', 
    'Tensile_Strength', 
    'Moisture_Absorption',
    'Terrain_Encoded',
    'Fabric_Encoded' # Helpful context, though properties matter more
]

X = df[feature_columns]
y = df['Performance_Score'] # The "Output" we want to predict

# 4. Test_split part

In [18]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 5. Train the Model

In [19]:
print("\nTraining the Fabric Performance Model...")
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)


Training the Fabric Performance Model...


0,1,2
,n_estimators,100
,criterion,'squared_error'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,1.0
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


# 6. Evaluate

In [20]:
model.score(X_test, y_test)

0.9641462433297786

In [23]:
# Prefer using the in-memory model and encoders defined earlier in the notebook.
# If they are not available (e.g., you run this cell in a fresh kernel), fall back to loading them from disk.
# Adjust the fallback paths below to where you actually saved the model and encoders.

try:
    model  # reference existing in-memory model
except NameError:
    import joblib
    # fallback: load your trained model file (update path if different)
    model = joblib.load('pkl_file/fabric_model.pkl')

try:
    le_terrain
except NameError:
    import joblib
    le_terrain = joblib.load('terrain_encoder.pkl')

try:
    le_fabric
except NameError:
    import joblib
    le_fabric = joblib.load('fabric_encoder.pkl')

# Let's simulate a scenario:
# "How well does heavy Wool perform in a Hot Desert?" (Should be bad)

# 1. Prepare the input data
test_fabric = {
    'Weight_gsm': [300],          # Heavy
    'Breathability': [1],       # Poor breathability
    'Insulation': [8],          # High heat retention
    'Tensile_Strength': [10],
    'Moisture_Absorption': [5],
    'Terrain_Encoded': le_terrain.transform(['Hot Desert'])[0], # The crucial context
    'Fabric_Encoded': le_fabric.transform(['Wool'])[0]
}

# pandas is already imported earlier in the notebook as pd; reuse it
test_df = pd.DataFrame(test_fabric)

# 2. Predict
prediction = model.predict(test_df)
print(f"Predicted Performance Score for Wool in Hot Desert: {prediction[0]:.2f}")

Predicted Performance Score for Wool in Hot Desert: 34.90


In [22]:
joblib.dump(model, "Frabic_model.pkl")
print("Model saved successfully!")


Model saved successfully!


# Testing