<a href="https://colab.research.google.com/github/mohammadreza-mohammadi94/Deep-Learning-Projects/blob/main/Fuit_Price_FAOSTAT/friut_price_predict.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import Libraries & Setup Enviorment

In [2]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import requests
import logging
import zipfile
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# Setup logger
logging.basicConfig(
    format="%(asctime)s - %(levelname)s - %(message)s",
    level=logging.INFO,
    handlers=[
        logging.FileHandler('fruit_price_log.log'),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

# Define Functions

*Download FAOSTAT*

In [13]:
def download_faostat_data():
    url = "https://bulks-faostat.fao.org/production/Prices_E_All_Data_(Normalized).zip"
    dataset_path = "faostat_prices_normalized.csv"
    zip_path = "Prices_E_All_Data_(Normalized).zip"
    target_csv = "Prices_E_All_Data_(Normalized).csv"

    if not os.path.exists(dataset_path):
        logger.info("Downloading FAOSTAT Producer Prices data...")
        try:
            response = requests.get(url, stream=True)
            with open(zip_path, 'wb') as f:
                f.write(response.content)
            logger.info("ZIP file downloaded.")

            # استخراج فایل CSV خاص از ZIP
            with zipfile.ZipFile(zip_path, 'r') as z:
                # بررسی وجود فایل مورد نظر
                if target_csv not in z.namelist():
                    logger.error(f"Target CSV file {target_csv} not found in ZIP archive. Available files: {z.namelist()}")
                    raise FileNotFoundError(f"Target CSV file {target_csv} not found in ZIP archive.")
                z.extract(target_csv)
                os.rename(target_csv, dataset_path)
            logger.info(f"CSV file extracted and saved as {dataset_path}")
        except Exception as e:
            logger.error(f"Failed to download or extract FAOSTAT data: {e}")
            raise
    else:
        logger.info("FAOSTAT data already exists.")
    return dataset_path

*Check Available Items & Area*

In [14]:
def log_available_items_and_areas(dataset_path):
    logger.info("Logging available items and areas...")
    try:
        df = pd.read_csv(dataset_path, encoding="latin1")
        unique_items = df['Item'].unique()
        unique_areas = df['Area'].unique()
        logger.info(f"Available Items: {unique_items[:20]}")  # فقط 20 مورد اول برای خلاصه بودن
        logger.info(f"Available Areas: {unique_areas[:20]}")
    except Exception as e:
        logger.error(f"Error logging available items and areas: {e}")
        raise

*Load Data*

In [15]:
def load_fruit_price_data(dataset_path, fruit="Apples",
                          country='United States Of America'):
    logger.info(f"Loading Price Data for {fruit} in {country}")
    try:
        df = pd.read_csv(dataset_path, encoding="latin1")
        # Filtering based on fruit and country
        df = df[(df['Item'] == fruit) & (df['Area'] == country)]
        if df.empty:
            logger.error(f"No Data Found For {fruit} in {country}")
            raise ValueError(f"No Data Found For {fruit} in {country}")

        # Select columns and convert datetime
        df = df[['Year', 'Value']].rename(
             columns={"Year": 'Date', 'Value': f'{fruit}_Price'})
        df["Date"] = pd.to_datetime(df["Date"], format="%Y")
        df = df.sort_values('Date')

        logger.info(f"Loaded {len(df)} Records fr {fruit} in {country}")
        return df
    except Exception as e:
        logger.error(f"Error Loading Data {e}")
        raise

*Preprocess Data*

In [20]:
def preprocess_data(df, fruit, sequence_length=5, normalize=True):
    logger.info("Preprocessing data...")
    try:
        prices = df[f'{fruit}_Price'].values.reshape(-1, 1)

        # check price range
        price_min, price_max = prices.min(), prices.max()
        logger.info(f"Price range before normalization: min={price_min:.2f}, max={price_max:.2f}")

        # normalization (optional)
        if normalize:
            scaler = MinMaxScaler()
            prices_scaled = scaler.fit_transform(prices)
        else:
            prices_scaled = prices
            scaler = None

        # creating sequences
        X, y = [], []
        for i in range(len(prices_scaled) - sequence_length):
            X.append(prices_scaled[i:i + sequence_length])
            y.append(prices_scaled[i + sequence_length])
        X = np.array(X)
        y = np.array(y)

        logger.info(f"Data preprocessed: X shape={X.shape}, y shape={y.shape}")
        return X, y, scaler
    except Exception as e:
        logger.error(f"Error in preprocessing data: {e}")
        raise


*Build LSTM Model*

In [22]:
def build_lstm_model(sequence_length):
    logger.info("Building LSTM Model...")
    try:
        model = Sequential([
            LSTM(64, input_shape=(sequence_length, 1), return_sequences=True),
            Dropout(0.2),
            LSTM(32),
            Dropout(0.2),
            Dense(16, activation='relu'),
            Dense(1, activation='linear')
        ])
        model.compile(optimizer='adam',
                    loss='mse')
        logger.info("LSTM model build successfully.")
        return model
    except Exception as e:
        logger.error(f"Error building model: {e}")
        raise

*Main*

In [23]:
logger.info("Starting fruit price prediction project...")
# Load data & Define params
dataset_path = download_faostat_data()
FRUIT = "Bananas"
COUNTRY = "Cuba"
df = load_fruit_price_data(dataset_path, fruit=FRUIT, country=COUNTRY)

# Log values
log_available_items_and_areas(dataset_path)

# Preprocessing
SEQUENCE_LENGTH = 5
X, y, scaler = preprocess_data(df, fruit=FRUIT,
                               sequence_length=SEQUENCE_LENGTH, normalize=False)

# trian/test split
logger.info("Splitting data into train and test sets...")
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
logger.info(f"Train set: X={X_train.shape}, y={y_train.shape}")
logger.info(f"Test set: X={X_test.shape}, y={y_test.shape}")

# Building and training the model
model = build_lstm_model(SEQUENCE_LENGTH)
model.summary()
logger.info("Training LSTM model...")
history = model.fit(X_train, y_train, epochs=50, batch_size=16, validation_data=(X_test, y_test))
logger.info("Training completed.")

# Evaluate the model performance
logger.info("Evaluating model...")
loss = model.evaluate(X_test, y_test)
logger.info(f"Test loss: {loss}")

# Save the model
model.save("fruit_price_lstm_model.h5")
logger.info("Model saved as fruit_price_lstm_model.h5")

  super().__init__(**kwargs)


Epoch 1/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 205ms/step - loss: 1090573.1250 - val_loss: 912606.5625
Epoch 2/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step - loss: 963741.6250 - val_loss: 912351.9375
Epoch 3/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step - loss: 1447673.7500 - val_loss: 912078.4375
Epoch 4/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - loss: 1142692.7500 - val_loss: 911790.8750
Epoch 5/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step - loss: 1145980.2500 - val_loss: 911463.8750
Epoch 6/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - loss: 1115805.7500 - val_loss: 911101.6875
Epoch 7/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step - loss: 1322303.2500 - val_loss: 910693.4375
Epoch 8/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - loss: 1212546.1250 

