In [None]:

# how to add full folder imgages in df


import os
import cv2
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

# Path to folder containing images
folder_path = r"C:\Users\YourUsername\Documents\Images"  # Change this to your actual folder path

# Get list of image files
image_files = [f for f in os.listdir(folder_path) if f.endswith(('.png', '.jpg', '.jpeg'))]

# Create lists to store filenames and images
image_data = []
image_labels = []

# Read each image, convert to NumPy array, and store
for file in image_files:
    img_path = os.path.join(folder_path, file)
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)  # Read in grayscale (optional)
    img = cv2.resize(img, (64, 64))  # Resize all images to 64x64 pixels
    img_array = img.flatten()  # Flatten image to a 1D array
    
    image_data.append(img_array)
    image_labels.append(file.split('_')[0])  # Example: Extract label from filename (Modify if needed)

# Convert to DataFrame
df = pd.DataFrame(image_data)
df['Label'] = image_labels  # Add labels (Modify if labels are stored separately)

# Split into train and test sets
X = df.iloc[:, :-1].values  # Features (image pixels)
y = df.iloc[:, -1].values   # Labels

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Display shapes of datasets
print(f"Training data shape: {X_train.shape}")
print(f"Testing data shape: {X_test.shape}")


In [None]:
# Correct Approach (Train, Validation, and Test Split)

import tensorflow as tf
from tensorflow.keras.preprocessing import image_dataset_from_directory

# Folder locations
train_val_dir = r"C:\Users\YourUsername\Documents\dataset\train_val"  # Contains train+val images
test_dir = r"C:\Users\YourUsername\Documents\dataset\test"  # Separate folder for test images

# Load training + validation dataset (80% train, 20% validation)
train_dataset = image_dataset_from_directory(
    train_val_dir,
    validation_split=0.2,  # Split into train (80%) and validation (20%)
    subset="training",
    seed=42,
    image_size=(32, 32),
    batch_size=32
)

val_dataset = image_dataset_from_directory(
    train_val_dir,
    validation_split=0.2,  # Same split as train dataset
    subset="validation",
    seed=42,
    image_size=(32, 32),
    batch_size=32
)

# Load test dataset separately (NO validation split here)
test_dataset = image_dataset_from_directory(
    test_dir,  # Only test images
    image_size=(32, 32),
    batch_size=32
)

# Normalize images
normalization_layer = tf.keras.layers.Rescaling(1./255)
train_dataset = train_dataset.map(lambda x, y: (normalization_layer(x), y))
val_dataset = val_dataset.map(lambda x, y: (normalization_layer(x), y))
test_dataset = test_dataset.map(lambda x, y: (normalization_layer(x), y))
# 🚀 Summary:
# Dataset	Source Folder	How it’s Split
# Train	train_val	80% of train_val folder
# Validation	train_val	20% of train_val folder
# Test	test	No split, separate test dataset

In [None]:
✅ Method 1: Random Sampling from the Dataset


import pandas as pd
from sklearn.model_selection import train_test_split

# Load dataset
df = pd.read_csv("your_dataset.csv")

# Take a subset (e.g., 30% of the original dataset)
subset_df = df.sample(frac=0.3, random_state=42)

# Assume last column is the target variable
X = subset_df.iloc[:, :-1]  # Features
y = subset_df.iloc[:, -1]   # Target

# Split the subset into training (80%) and testing (20%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Print subset sizes
print(f"Subset Size: {len(subset_df)}, Train Size: {len(X_train)}, Test Size: {len(X_test)}")


In [None]:
# What Are Frozen Layers in Deep Learning?
# 🔹 Frozen layers are layers in a neural network whose weights do not update during training.
# 🔹 This is commonly used in transfer learning, where we use a pre-trained model and only train some layers while keeping others fixed (frozen).

# Why Freeze Layers?
# ✅ To retain pre-learned features from a large dataset (e.g., ImageNet).
# ✅ To reduce training time, since fewer parameters are updated.
# ✅ To avoid overfitting, especially when the dataset is small.

# How to Freeze Layers?
# In TensorFlow/Keras, you can freeze layers by setting:

layer.trainable = False
# This ensures that weights of that layer will not change during training.

# Example: Freezing Layers in Transfer Learning

from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten

# Load the pre-trained model (without the top classification layers)
base_model = VGG16(weights="imagenet", include_top=False, input_shape=(224, 224, 3))

# Freeze all layers in the base model
for layer in base_model.layers:
    layer.trainable = False  # Freezing layers

# Add new layers on top
model = Sequential([
    base_model,
    Flatten(),
    Dense(128, activation="relu"),
    Dense(10, activation="softmax")  # Output layer for classification
])

# Check trainable parameters
print("Trainable Parameters:", sum([np.prod(var.shape) for var in model.trainable_weights]))  # Should be only for new layers

# Compile the model
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
# # When to Freeze and Unfreeze Layers?
# 1️⃣ When dataset is small → Freeze most layers, train only the last few.
# 2️⃣ When dataset is large & similar → Unfreeze more layers for fine-tuning.
# 3️⃣ Fine-tuning → First train with frozen layers, then unfreeze some layers and retrain with a low learning rate.

# 📌 Summary
# Action	Effect
# Freeze Layers (trainable=False)	Weights remain unchanged during training.
# Unfreeze Layers (trainable=True)	Weights are updated during training.
# Used in Transfer Learning?	✅ Yes, for feature reuse & faster training.

Sure! Here’s a complete step-by-step guide to adding files and folders to Jupyter Notebook (in Anaconda). I’ll cover:

Adding files and folders in Jupyter

Uploading ZIP files and extracting

Handling different file types (CSV, images, text, etc.)

Creating a folder structure

Splitting datasets (e.g., into training/testing)

🔧 1. Launching Jupyter Notebook from Anaconda
Step 1:

Open Anaconda Navigator.

Launch Jupyter Notebook.

A browser window will open at http://localhost:8888/tree.

📁 2. Creating Folders in Jupyter Notebook
Steps:

Click on the "New" dropdown (top-right).

Select "Folder" → A new folder appears named Untitled Folder.

Rename it by selecting the checkbox → Rename.

Example folder structure to follow:

kotlin
Copy
Edit
project/
│
├── data/
│   ├── raw/
│   ├── processed/
│
├── models/
├── notebooks/
├── utils/
📄 3. Uploading Files (CSV, TXT, Images, etc.)
Steps:

Click Upload (top-right).

Browse and select files (e.g., data.csv, image.jpg, text.txt).

Click the Upload button next to the filename.

📦 4. Uploading and Extracting ZIP Files
Steps:

Upload the .zip file via the Upload button.

In a notebook, run:

python
Copy
Edit
import zipfile

# Path to zip file and extraction
zip_path = "your_file.zip"
extract_path = "data/"

# Extract
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)
✅ Extracts everything to the data/ folder.

📂 5. Reading Different File Types in Jupyter
a) CSV File
python
Copy
Edit
import pandas as pd

df = pd.read_csv('data/raw/data.csv')
print(df.head())
b) Excel File
python
Copy
Edit
df = pd.read_excel('data/raw/data.xlsx')
c) Image File (for CV Projects)
python
Copy
Edit
from PIL import Image
import matplotlib.pyplot as plt

img = Image.open('data/raw/image.jpg')
plt.imshow(img)
plt.axis('off')
plt.show()
d) Text File
python
Copy
Edit
with open('data/raw/sample.txt', 'r') as f:
    content = f.read()
    print(content)
✂️ 6. Splitting Data into Train/Test Folders (Image or Text)
For Image Classification Projects:
python
Copy
Edit
import os
import shutil
from sklearn.model_selection import train_test_split

# Set paths
source_folder = "data/raw_images/"
train_folder = "data/train/"
test_folder = "data/test/"

# Create folders if not exist
for folder in [train_folder, test_folder]:
    os.makedirs(folder, exist_ok=True)

# Get list of images
images = os.listdir(source_folder)
train_imgs, test_imgs = train_test_split(images, test_size=0.2, random_state=42)

# Move files
for img in train_imgs:
    shutil.copy(os.path.join(source_folder, img), train_folder)

for img in test_imgs:
    shutil.copy(os.path.join(source_folder, img), test_folder)
For CSV Data (ML Projects):
python
Copy
Edit
from sklearn.model_selection import train_test_split
import pandas as pd

df = pd.read_csv('data/raw/data.csv')

# Split
train, test = train_test_split(df, test_size=0.2, random_state=42)

# Save
train.to_csv('data/processed/train.csv', index=False)
test.to_csv('data/processed/test.csv', index=False)
