##   Finetune LLM-Interpret Heat Equation Splution
#### Github code and Documentation:

-    https://github.com/sajibhalder/FineTune-LLM-Heat-Equation-Solution

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/fine-tuning-lm-physical-interpretation-hackathon/Sample_submission.csv
/kaggle/input/fine-tuning-lm-physical-interpretation-hackathon/Case3.vtk
/kaggle/input/fine-tuning-lm-physical-interpretation-hackathon/Case4.vtk
/kaggle/input/fine-tuning-lm-physical-interpretation-hackathon/Case1.vtk
/kaggle/input/fine-tuning-lm-physical-interpretation-hackathon/Case2.vtk
/kaggle/input/fine-tuning-lm-physical-interpretation-hackathon/Questions.csv


## 1. Dataset Generation for Heat Equation Solution- Case1, Case2, Case3, Case4
The time dependent heat equation is given as
## ∂T∂t−α(∂2∂x2+∂2∂y2)T=f(x,y,t)
where

T
 : Temperature field
α
 : Thermal conductivity
f
 : Force function
Details of the solution for different cases on [0,1] X [0,1] sqaure :

## Case1 : ∂²T/∂x² + ∂²T/∂y² = 8π²sin(2πx)sin(2πy)
content_copy
Boundary Conditions

T(0,y) = 0
T(1,y) = 0
T(x,0) = 0
T(x,1) = 0

## Case2 : ∂²T/∂x² + ∂²T/∂y² = 0  
content_copy
Initial and Boundary Conditions

T(0,y) = 0
T(1,y) = 0
T(x,0) = 0
T(x,1) = sin(πx)

## Case3 : ∂²T/∂x² + ∂²T/∂y² = 0
content_copy
Boundary Conditions

T(0,y) = 0
T(1,y) = y(1-y)
T(x,0) = 0
T(x,1) = 0

## Case4 : ∂T/∂t = α(∂²T/∂x² + ∂²T/∂y²)
content_copy
Initial and Boundary Conditions

T(x,y,0) = sin(πx)sin(πy)
T(0,y,t) = 0
T(1,y,t) = 0
T(x,0,t) = 0
T(x,1,t) = 0
α = 0.01

In [2]:
import numpy as np
import pandas as pd

# Parameters
L = 1.0  # Length of the domain
N = 40  # Number of grid points (adjusted for max 10k rows dataset size)
alpha = 0.01  # Thermal conductivity
T_max = 1.0  # Maximum time
dt = 0.001  # Time step
dx = dy = L / N  # Grid spacing

# Discretized spatial domain
x = np.linspace(0, L, N)
y = np.linspace(0, L, N)
X, Y = np.meshgrid(x, y)

# Initialize temperature field
T = np.zeros((N, N))

# Define the source term f(x, y)
def source_term(x, y):
    return 8 * np.pi**2 * np.sin(2 * np.pi * x) * np.sin(2 * np.pi * y)

# Time loop for solving the heat equation
def solve_heat_equation_case_1(T, alpha, dt, dx, dy, T_max, max_rows=10000):
    num_time_steps = int(T_max / dt)
    dataset = []
    
    # Apply boundary conditions
    for i in range(N):
        for j in range(N):
            if i == 0 or i == N-1 or j == 0 or j == N-1:
                T[i, j] = 0  # Boundary conditions set to 0 as per the problem
    
    time_steps_taken = 0  # To ensure dataset doesn't exceed max_rows
    
    # Time-stepping loop
    for t in range(num_time_steps):
        T_new = T.copy()
        
        # Interior points update (finite difference method)
        for i in range(1, N-1):
            for j in range(1, N-1):
                T_new[i, j] = T[i, j] + alpha * dt * (
                    (T[i+1, j] - 2*T[i, j] + T[i-1, j]) / dx**2 +
                    (T[i, j+1] - 2*T[i, j] + T[i, j-1]) / dy**2 +
                    source_term(x[i], y[j])
                )
        
        T = T_new
        
        # Save data for each time step, but limit dataset size
        for i in range(N):
            for j in range(N):
                if len(dataset) < max_rows:
                    dataset.append([x[i], y[j], T[i, j], t*dt])
                if len(dataset) >= max_rows:
                    break
            if len(dataset) >= max_rows:
                break
        if len(dataset) >= max_rows:
            break
    
    return pd.DataFrame(dataset, columns=['x', 'y', 'z', 'Temperature'])

# Solve for Case 1
dataset_case_1 = solve_heat_equation_case_1(T=T, alpha=alpha, dt=dt, dx=dx, dy=dy, T_max=T_max)


# Save as CSV
csv_file_path = "heat_eq_case_1.csv"
dataset_case_1.to_csv(csv_file_path, index=False)

In [3]:
import numpy as np
import pandas as pd

# Parameters
L = 1.0  # Length of the domain
N = 40  # Number of grid points (adjusted for max 10k rows dataset size)
alpha = 0.01  # Thermal conductivity (this doesn't affect Case 2 as equation is homogeneous)
T_max = 1.0  # Maximum time
dt = 0.001  # Time step
dx = dy = L / N  # Grid spacing

# Discretized spatial domain
x = np.linspace(0, L, N)
y = np.linspace(0, L, N)
X, Y = np.meshgrid(x, y)

# Initialize temperature field
T = np.zeros((N, N))

# Boundary conditions for Case 2
def boundary_conditions(x, y):
    # T(x,0) = 0, T(x,1) = sin(πx), T(0,y) = 0, T(1,y) = 0
    if y == 0 or y == 1:
        return 0
    if x == 0 or x == 1:
        return 0
    return np.sin(np.pi * x)  # For y=1, the boundary condition is sin(πx)

# Time loop for solving the heat equation
def solve_heat_equation_case_2(T, alpha, dt, dx, dy, T_max, max_rows=10000):
    num_time_steps = int(T_max / dt)
    dataset = []
    
    # Apply initial and boundary conditions
    for i in range(N):
        for j in range(N):
            T[i, j] = boundary_conditions(x[i], y[j])
    
    time_steps_taken = 0  # To ensure dataset doesn't exceed max_rows
    
    # Time-stepping loop
    for t in range(num_time_steps):
        T_new = T.copy()
        
        # Interior points update (finite difference method)
        for i in range(1, N-1):
            for j in range(1, N-1):
                T_new[i, j] = T[i, j] + alpha * dt * (
                    (T[i+1, j] - 2*T[i, j] + T[i-1, j]) / dx**2 +
                    (T[i, j+1] - 2*T[i, j] + T[i, j-1]) / dy**2
                )
        
        T = T_new
        
        # Save data for each time step, but limit dataset size
        for i in range(N):
            for j in range(N):
                if len(dataset) < max_rows:
                    dataset.append([x[i], y[j], T[i, j], t*dt])
                if len(dataset) >= max_rows:
                    break
            if len(dataset) >= max_rows:
                break
        if len(dataset) >= max_rows:
            break
    
    return pd.DataFrame(dataset, columns=['x', 'y', 'z', 'Temperature'])

# Solve for Case 2
dataset_case_2 = solve_heat_equation_case_2(T=T, alpha=alpha, dt=dt, dx=dx, dy=dy, T_max=T_max)


# Save as CSV
csv_file_path = "heat_eq_case_2.csv"
dataset_case_2.to_csv(csv_file_path, index=False)

In [4]:
import numpy as np
import pandas as pd

# Parameters
L = 1.0  # Length of the domain
N = 40  # Number of grid points (adjusted for max 10k rows dataset size)
alpha = 0.01  # Thermal conductivity (not required here as it's a steady-state equation)
T_max = 1.0  # Maximum time
dt = 0.001  # Time step
dx = dy = L / N  # Grid spacing

# Discretized spatial domain
x = np.linspace(0, L, N)
y = np.linspace(0, L, N)
X, Y = np.meshgrid(x, y)

# Initialize temperature field
T = np.zeros((N, N))

# Boundary conditions for Case 3
def boundary_conditions(x, y):
    if x == 0:
        return 0  # T(0, y) = 0
    if x == 1:
        return y * (1 - y)  # T(1, y) = y(1 - y)
    if y == 0 or y == 1:
        return 0  # T(x,0) = 0 and T(x,1) = 0
    return None  # Interior points

# Apply boundary conditions
for i in range(N):
    for j in range(N):
        bc_value = boundary_conditions(x[i], y[j])
        if bc_value is not None:
            T[i, j] = bc_value

# Solve the heat equation using the finite difference method
def solve_heat_equation_case_3(T, alpha, dt, dx, dy, T_max, max_rows=10000):
    num_time_steps = int(T_max / dt)
    dataset = []

    time_steps_taken = 0  # To ensure dataset doesn't exceed max_rows

    for t in range(num_time_steps):
        T_new = T.copy()

        # Interior points update (finite difference method)
        for i in range(1, N-1):
            for j in range(1, N-1):
                T_new[i, j] = T[i, j] + alpha * dt * (
                    (T[i+1, j] - 2*T[i, j] + T[i-1, j]) / dx**2 +
                    (T[i, j+1] - 2*T[i, j] + T[i, j-1]) / dy**2
                )

        T = T_new

        # Save data for each time step, but limit dataset size
        for i in range(N):
            for j in range(N):
                if len(dataset) < max_rows:
                    dataset.append([x[i], y[j], T[i, j], t*dt])
                if len(dataset) >= max_rows:
                    break
            if len(dataset) >= max_rows:
                break
        if len(dataset) >= max_rows:
            break

    return pd.DataFrame(dataset, columns=['x', 'y', 'z', 'Temperature'])

# Solve for Case 3
dataset_case_3 = solve_heat_equation_case_3(T=T, alpha=alpha, dt=dt, dx=dx, dy=dy, T_max=T_max)

# Save as CSV
csv_file_path = "heat_eq_case_3.csv"
dataset_case_3.to_csv(csv_file_path, index=False)


In [5]:
import numpy as np
import pandas as pd

# Parameters
L = 1.0  # Length of the domain
N = 40  # Number of grid points (adjusted for max 10k rows dataset size)
alpha = 0.01  # Thermal diffusivity
T_max = 1.0  # Maximum simulation time
dt = 0.001  # Time step
dx = dy = L / N  # Grid spacing

# Discretized spatial domain
x = np.linspace(0, L, N)
y = np.linspace(0, L, N)
X, Y = np.meshgrid(x, y)

# Initialize temperature field with initial condition T(x,y,0) = sin(πx)sin(πy)
T = np.sin(np.pi * X) * np.sin(np.pi * Y)

# Boundary conditions for Case 4
def apply_boundary_conditions(T):
    T[0, :] = 0  # T(0,y,t) = 0
    T[-1, :] = 0  # T(1,y,t) = 0
    T[:, 0] = 0  # T(x,0,t) = 0
    T[:, -1] = 0  # T(x,1,t) = 0
    return T

# Solve the heat equation using the finite difference method
def solve_heat_equation_case_4(T, alpha, dt, dx, dy, T_max, max_rows=10000):
    num_time_steps = int(T_max / dt)
    dataset = []

    for t in range(num_time_steps):
        T_new = T.copy()

        # Interior points update (explicit finite difference method)
        for i in range(1, N-1):
            for j in range(1, N-1):
                T_new[i, j] = T[i, j] + alpha * dt * (
                    (T[i+1, j] - 2*T[i, j] + T[i-1, j]) / dx**2 +
                    (T[i, j+1] - 2*T[i, j] + T[i, j-1]) / dy**2
                )

        T = apply_boundary_conditions(T_new)  # Apply boundary conditions

        # Save data for each time step, but limit dataset size
        for i in range(N):
            for j in range(N):
                if len(dataset) < max_rows:
                    dataset.append([x[i], y[j], T[i, j], t * dt])
                if len(dataset) >= max_rows:
                    break
            if len(dataset) >= max_rows:
                break
        if len(dataset) >= max_rows:
            break

    return pd.DataFrame(dataset, columns=['x', 'y', 'z', 'Temperature'])

# Solve for Case 4
dataset_case_4 = solve_heat_equation_case_4(T=T, alpha=alpha, dt=dt, dx=dx, dy=dy, T_max=T_max)

# Save to CSV
csv_file_path = "heat_eq_case_4.csv"
dataset_case_4.to_csv(csv_file_path, index=False)

In [6]:
csv_files = ["/kaggle/working/heat_eq_case_1.csv",
                "/kaggle/working/heat_eq_case_2.csv",
                "/kaggle/working/heat_eq_case_3.csv",
                "/kaggle/working/heat_eq_case_4.csv"]
# Merge all CSV files
final_df = pd.concat([pd.read_csv(csv) for csv in csv_files], ignore_index=True)

# Save the final merged dataset
final_csv_path = "/kaggle/working/final_heat_eq.csv"
final_df.to_csv(final_csv_path, index=False)

print(f"Final dataset saved: {final_csv_path}")

Final dataset saved: /kaggle/working/final_heat_eq.csv


In [7]:
import pandas as pd

# Load the dataset
data_path = "/kaggle/working/final_heat_eq.csv"  # Update with your dataset name
df = pd.read_csv(data_path)

# Display the first few rows
print(df.head(3))

     x         y    z  Temperature
0  0.0  0.000000  0.0          0.0
1  0.0  0.025641  0.0          0.0
2  0.0  0.051282  0.0          0.0


In [8]:
# Create LLM fine-tuning dataset
df["prompt"] = df.apply(lambda row: 
    f"What is the temperature at point ({row['x']}, {row['y']}, {row['z']})?", axis=1)
df["response"] = df["Temperature"].apply(lambda temp: f"The temperature is {temp}.")

# Save the fine-tuning dataset
df[["prompt", "response"]].to_csv("heat_eq_finetune.csv", index=False)
print("Fine-tuning CSV saved: heat_eq_finetune.csv")

Fine-tuning CSV saved: heat_eq_finetune.csv


In [9]:
import pandas as pd

# Load the dataset
data_path = "/kaggle/working/heat_eq_finetune.csv"  # Update with your dataset name
df = pd.read_csv(data_path)

# Display the first few rows
print(df.head(3))

                                              prompt                 response
0  What is the temperature at point (0.0, 0.0, 0.0)?  The temperature is 0.0.
1  What is the temperature at point (0.0, 0.02564...  The temperature is 0.0.
2  What is the temperature at point (0.0, 0.05128...  The temperature is 0.0.


## 2. Data Scraping and Dataset Generation from given VTK Input file

In [10]:
!pip install pyvista

Collecting pyvista
  Downloading pyvista-0.44.2-py3-none-any.whl.metadata (15 kB)
Downloading pyvista-0.44.2-py3-none-any.whl (2.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m25.1 MB/s[0m eta [36m0:00:00[0m00:01[0m0:01[0m
[?25hInstalling collected packages: pyvista
Successfully installed pyvista-0.44.2


In [11]:
import pyvista as pv

# Load the VTK file
vtk_file_path = "/kaggle/input/fine-tuning-lm-physical-interpretation-hackathon/Case1.vtk"
mesh = pv.read(vtk_file_path)

# Print information about the mesh
print(mesh)

StructuredGrid (0x796ec6b44d60)
  N Cells:      9801
  N Points:     10000
  X Bounds:     0.000e+00, 1.000e+00
  Y Bounds:     0.000e+00, 1.000e+00
  Z Bounds:     0.000e+00, 0.000e+00
  Dimensions:   100, 100, 1
  N Arrays:     1


In [12]:
import numpy as np

# Convert VTK points to NumPy array
points = np.array(mesh.points)

# Convert cell data to NumPy
cell_data = mesh.cell_data
point_data = mesh.point_data

print("Points:\n", points)
print("Point Data:\n", point_data)

Points:
 [[0.         0.         0.        ]
 [0.         0.01010101 0.        ]
 [0.         0.02020202 0.        ]
 ...
 [1.         0.97979798 0.        ]
 [1.         0.98989899 0.        ]
 [1.         1.         0.        ]]
Point Data:
 pyvista DataSetAttributes
Association     : POINT
Active Scalars  : Temperature
Active Vectors  : None
Active Texture  : None
Active Normals  : None
Contains arrays :
    Temperature             float64    (10000,)             SCALARS


In [14]:
import pyvista as pv
import pandas as pd
import numpy as np

In [16]:
vtk_file_path = "/kaggle/input/fine-tuning-lm-physical-interpretation-hackathon/Case1.vtk"
mesh = pv.read(vtk_file_path)

# Extract Points (x, y, z)
points = np.array(mesh.points)

# Extract Scalars (Point Data)
point_data = {name: mesh.point_data[name] for name in mesh.point_data.keys()}

# Convert to DataFrame
df = pd.DataFrame(points, columns=["x", "y", "z"])

# Add scalar fields
for name, values in point_data.items():
    df[name] = values

# Save as CSV
csv_file_path = "Case1vtk_extracted_data.csv"
df.to_csv(csv_file_path, index=False)

print(f"CSV file saved: {csv_file_path}")

CSV file saved: Case1vtk_extracted_data.csv


In [17]:
# Load the VTK file
vtk_file_path = "/kaggle/input/fine-tuning-lm-physical-interpretation-hackathon/Case2.vtk"
mesh = pv.read(vtk_file_path)
# Extract Points (x, y, z)
points = np.array(mesh.points)

# Extract Scalars (Point Data)
point_data = {name: mesh.point_data[name] for name in mesh.point_data.keys()}

# Convert to DataFrame
df = pd.DataFrame(points, columns=["x", "y", "z"])

# Add scalar fields
for name, values in point_data.items():
    df[name] = values

# Save as CSV
csv_file_path = "Case2vtk_extracted_data.csv"
df.to_csv(csv_file_path, index=False)

print(f"CSV file saved: {csv_file_path}")

CSV file saved: Case2vtk_extracted_data.csv


In [18]:
# Load the VTK file
vtk_file_path = "/kaggle/input/fine-tuning-lm-physical-interpretation-hackathon/Case3.vtk"
mesh = pv.read(vtk_file_path)
# Extract Points (x, y, z)
points = np.array(mesh.points)

# Extract Scalars (Point Data)
point_data = {name: mesh.point_data[name] for name in mesh.point_data.keys()}

# Convert to DataFrame
df = pd.DataFrame(points, columns=["x", "y", "z"])

# Add scalar fields
for name, values in point_data.items():
    df[name] = values

# Save as CSV
csv_file_path = "Case3vtk_extracted_data.csv"
df.to_csv(csv_file_path, index=False)

print(f"CSV file saved: {csv_file_path}")

CSV file saved: Case3vtk_extracted_data.csv


In [19]:
# Load the VTK file
vtk_file_path = "/kaggle/input/fine-tuning-lm-physical-interpretation-hackathon/Case4.vtk"
mesh = pv.read(vtk_file_path)
# Extract Points (x, y, z)
points = np.array(mesh.points)

# Extract Scalars (Point Data)
point_data = {name: mesh.point_data[name] for name in mesh.point_data.keys()}

# Convert to DataFrame
df = pd.DataFrame(points, columns=["x", "y", "z"])

# Add scalar fields
for name, values in point_data.items():
    df[name] = values

# Save as CSV
csv_file_path = "Case4vtk_extracted_data.csv"
df.to_csv(csv_file_path, index=False)

print(f"CSV file saved: {csv_file_path}")

CSV file saved: Case4vtk_extracted_data.csv


## 3. Data Preprocessing and Pipeline

In [20]:
csv_files = ["/kaggle/working/Case1vtk_extracted_data.csv",
                "/kaggle/working/Case2vtk_extracted_data.csv",
                "/kaggle/working/Case3vtk_extracted_data.csv",
                "/kaggle/working/Case4vtk_extracted_data.csv"]
# Merge all CSV files
final_df = pd.concat([pd.read_csv(csv) for csv in csv_files], ignore_index=True)

# Save the final merged dataset
final_csv_path = "/kaggle/working/final_vtk_dataset.csv"
final_df.to_csv(final_csv_path, index=False)

print(f"Final dataset saved: {final_csv_path}")

Final dataset saved: /kaggle/working/final_vtk_dataset.csv


In [21]:
import pandas as pd

# Load the dataset
data_path = "/kaggle/working/final_vtk_dataset.csv"  # Update with your dataset name
df = pd.read_csv(data_path)

# Display the first few rows
print(df.head(3))

     x         y    z  Temperature
0  0.0  0.000000  0.0         -0.0
1  0.0  0.010101  0.0         -0.0
2  0.0  0.020202  0.0         -0.0


In [22]:
print(df.tail(3))

         x         y    z   Temperature
39997  1.0  0.979798  0.0  7.037214e-18
39998  1.0  0.989899  0.0  3.520380e-18
39999  1.0  1.000000  0.0  1.358809e-32


In [23]:
# Create LLM fine-tuning dataset
df["prompt"] = df.apply(lambda row: 
    f"What is the temperature at point ({row['x']}, {row['y']}, {row['z']})?", axis=1)
df["response"] = df["Temperature"].apply(lambda temp: f"The temperature is {temp}.")

# Save the fine-tuning dataset
df[["prompt", "response"]].to_csv("heat_equation_finetune.csv", index=False)
print("Fine-tuning CSV saved: heat_equation_finetune.csv")

Fine-tuning CSV saved: heat_equation_finetune.csv


In [24]:
import pandas as pd

# Load the dataset
data_path = "/kaggle/working/heat_equation_finetune.csv"  # Update with your dataset name
df = pd.read_csv(data_path)

# Display the first few rows
print(df.head(3))

                                              prompt                  response
0  What is the temperature at point (0.0, 0.0, 0.0)?  The temperature is -0.0.
1  What is the temperature at point (0.0, 0.01010...  The temperature is -0.0.
2  What is the temperature at point (0.0, 0.02020...  The temperature is -0.0.


In [25]:
# consider only Case1 VTK file data:
def solve_heat_equation_case1():
    nx, ny = 50, 50  
    dx, dy = 1 / (nx - 1), 1 / (ny - 1) 
    x = np.linspace(0, 1, nx)
    y = np.linspace(0, 1, ny)
    X, Y = np.meshgrid(x, y)

    T = np.zeros((ny, nx))

    T[0, :] = 0  
    T[-1, :] = 0 
    T[:, 0] = 0  
    T[:, -1] = 0 

    def force_function(x, y):
        return 8 * np.pi**2 * np.sin(2 * np.pi * x) * np.sin(2 * np.pi * y)

    for _ in range(1000):  
        T_new = T.copy()
        for i in range(1, nx - 1):
            for j in range(1, ny - 1):
                T_new[j, i] = 0.25 * (T[j + 1, i] + T[j - 1, i] + T[j, i + 1] + T[j, i - 1] - dx**2 * force_function(x[i], y[j]))
        T = T_new

    grid = pv.StructuredGrid(X, Y, np.zeros_like(X))
    grid["Temperature"] = T.ravel()
    grid.save("Case1_FDM.vtk")

solve_heat_equation_case1()

grid = pv.read("Case1_FDM.vtk")

# Extract data
points = grid.points
temperature = grid["Temperature"]

data = pd.DataFrame({
    "x": points[:, 0],
    "y": points[:, 1],
    "temperature": temperature
})

data.to_csv("Case1_FDM.csv", index=False)

## 4. Installing Required packages

In [26]:
!pip install transformers==4.46.0 datasets peft bitsandbytes accelerate torch

Collecting transformers==4.46.0
  Downloading transformers-4.46.0-py3-none-any.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.1/44.1 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
Collecting bitsandbytes
  Downloading bitsandbytes-0.45.1-py3-none-manylinux_2_24_x86_64.whl.metadata (5.8 kB)
Collecting tokenizers<0.21,>=0.20 (from transformers==4.46.0)
  Downloading tokenizers-0.20.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Reason for being yanked: This version unfortunately does not work with 3.8 but we did not drop the support yet[0m[33m
[0mDownloading transformers-4.46.0-py3-none-any.whl (10.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.0/10.0 MB[0m [31m59.0 MB/s[0m eta [36m0:00:00[0m00:01[0m0:01[0m
[?25hDownloading bitsandbytes-0.45.1-py3-none-manylinux_2_24_x86_64.whl (69.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m69.7/69.7 MB[0m [31m24.6 MB/s[0

In [27]:
# Import necessary libraries
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, DataCollatorWithPadding
from datasets import Dataset
from peft import get_peft_model, TaskType
from peft import LoraConfig, get_peft_model  # LoRA fine-tuning
import torch
import gc
import os
from accelerate import Accelerator
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
accelerator = Accelerator(cpu=True)

## 5. Building LLM Pipeline

In [29]:
# Define model path
model_path = "ibm-granite/granite-3.1-8b-instruct"

In [None]:
# ✅ Auto-detect the best available device (GPU preferred)
#device = "cuda" if torch.cuda.is_available() else "cpu"

In [30]:
# ✅ Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path)
tokenizer.pad_token = tokenizer.eos_token  # Set padding token to avoid errors

tokenizer_config.json:   0%|          | 0.00/8.07k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/777k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/442k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/3.48M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/87.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/701 [00:00<?, ?B/s]

In [31]:
# ✅ Load model with 4-bit quantization to save memory
model = AutoModelForCausalLM.from_pretrained(model_path, 
                                             device_map="auto", 
                                             load_in_4bit=True)

config.json:   0%|          | 0.00/790 [00:00<?, ?B/s]

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


model.safetensors.index.json:   0%|          | 0.00/29.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/4.99G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.41G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/132 [00:00<?, ?B/s]

In [34]:
model.gradient_checkpointing_enable()

In [35]:
# ✅ Apply LoRA (Low-Rank Adaptation) to speed up fine-tuning
lora_config = LoraConfig(
    r=4,  # Low-rank adaptation size
    lora_alpha=16,  # Scaling factor
    target_modules=["q_proj", "v_proj"],  # LoRA applied to attention layers
    lora_dropout=0.01,
    task_type=TaskType.CAUSAL_LM)


In [36]:
model = get_peft_model(model, lora_config)

In [37]:
# solution for case1 data
data = pd.read_csv("Case1_FDM.csv")
dataset = Dataset.from_pandas(data)

In [None]:
# Load dataset
#dataset = load_dataset("csv", data_files="heat_equation_finetune.csv", split="train[:3%]")
#print((dataset[120]))  # Prints the number of rows in the dataset
#print(len(dataset))  # Prints the number of rows in the dataset

In [38]:
# ✅ Tokenization function
def preprocess_function(examples):
    examples["temperature"] = [str(temp) for temp in examples["temperature"]]
    tokenized_inputs = tokenizer(examples["temperature"], truncation=True, max_length=512)
    tokenized_inputs["labels"] = tokenized_inputs["input_ids"].copy()

    return tokenized_inputs

In [39]:
tokenized_dataset = dataset.map(preprocess_function, batched=True)

Map:   0%|          | 0/2500 [00:00<?, ? examples/s]

In [40]:
torch.cuda.empty_cache()
gc.collect()

91

In [41]:
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

## 6. Generate training and validation datasets

In [None]:
#✅ Shuffle once and split into 80% train / 20% eval
split_idx = int(0.8 * len(tokenized_dataset))
train_data = tokenized_dataset.shuffle(seed=42).select(range(split_idx))

In [None]:
eval_data = tokenized_dataset.select(range(split_idx, len(tokenized_dataset)))

## 7. Implement a fine-tuning methodology-using the prepared datasets

In [42]:
# ✅ Training arguments optimized for speed
training_args = TrainingArguments(
    output_dir="./granite_finetuned",
    run_name="granite_experiment",
    per_device_train_batch_size=1,  # Increase batch size if GPU allows
    gradient_accumulation_steps=8,  # ✅ Simulates larger batch size
    #per_device_eval_batch_size=8,
    num_train_epochs=3,  # Reduce training time
    learning_rate=2e-5,
    weight_decay=0.01,
    fp16=True,   # ✅ Mixed precision training (Faster training)
    save_strategy="epoch", # Save model every epoch 
    logging_dir="./logs",  
    logging_steps=10,# Log every 10 steps
)

In [43]:
# ✅ Initialize Trainer with optimized settings
from transformers import Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_data,
    data_collator=data_collator,
    #eval_dataset=eval_data
)

In [44]:
model, trainer = accelerator.prepare(model, trainer)

In [None]:
# ✅ Train the model (Now much faster!)
trainer.train()

In [None]:
# ✅ Save fine-tuned model & tokenizer
model.save_pretrained("./fine_tuned_model")
tokenizer.save_pretrained("./fine_tuned_model")


# 🚀 Model Inference Function

In [None]:
def load_model():
    """Load fine-tuned model for inference."""
    model_path = "fine_tuned_model"
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    model = AutoModelForCausalLM.from_pretrained(model_path, device_map="auto")
    model.eval()
    return model, tokenizer

In [None]:
def get_model_response(model, tokenizer, prompt, max_tokens=300):
    """Generate response from fine-tuned model."""
    input_text = f"Question: {prompt}\n\nAnswer:"
    input_tokens = tokenizer(input_text, return_tensors="pt").to(model.device)
    
    output = model.generate(
        **input_tokens,
        max_new_tokens=max_tokens,
        temperature=0.7,
        top_p=0.95,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id
    )
     # Decode and clean up the response
    response = tokenizer.decode(output[0], skip_special_tokens=True)
    # Remove the input prompt from the response
    response = response[len(input_text):].strip()
    return response

In [None]:
def model_inference():
    """Run inference for all cases."""
    model, tokenizer = load_model()
    
    cases = {
        "Case1Q1": "What is the temperature distribution at the corner (0,0) of the unit square mesh?",
        "Case1Q2": "How does the temperature change with respect to x-axis at y=0.5?",
        "Case1Q3": "If we increase the coefficient of pi in the force function, what will happen?",
        "Case2Q1": "Explain why the temperature is zero at both x=0 and x=1, and what this means physically.",
        "Case2Q2": "At what coordinates does the maximum temperature occur, and what determines this location?",
        "Case2Q3": "How does the temperature profile change along x=0.5 compared to x=0.25?",
        "Case3Q1": "What is the temperature at the corner (0,0) of the unit square mesh?",
        "Case3Q2": "What physical significance does the boundary condition u(0,y)=0 have?",
        "Case3Q3": "What does the boundary condition u(1,y)=y(1−y) represent physically?",
        "Case4Q1": "What can you infer about the decay rate of temperature?",
        "Case4Q2": "Why does the spatial pattern remain unchanged while only the amplitude decreases with time?",
        "Case4Q3": "What is the effect of alpha on the decay rate of heat dissipation?"
    }
    
    results = []
    for case, prompt in cases.items():
        print(f"\nTesting {case}...")
        try:
            response = get_model_response(model, tokenizer, prompt)
            results.append(response)
            print(f"\nResponse for {case}:")
            print(response)
        except Exception as e:
            print(f"Error in {case}: {str(e)}")
            results.append(f"Error: {str(e)}")
    
    return results

# 🚀 Run Inference and Save Results

In [None]:
# ✅ Save results as CSV
df = pd.DataFrame({"Id": list(range(1, 13)), "Answer": results})
df.to_csv('submission.csv', index=False)

# 🚀 Evaluation Metrics (BLEU & ROUGE)

In [None]:
!pip install sacrebleu rouge-score

In [None]:
# ✅ Load BLEU and ROUGE metrics
bleu_metric = load_metric("sacrebleu")
rouge_metric = load_metric("rouge")

In [None]:
from datasets import load_metric

In [None]:
# ✅ Load BLEU and ROUGE metrics
bleu_metric = load_metric("sacrebleu")
rouge_metric = load_metric("rouge")

In [None]:
reference_answers = [
    "The temperature at (0,0) is 0 degrees.",
    "The temperature along x-axis at y=0.5 increases quadratically, ranging from 0.25 to 1.25.",
    "Increasing the coefficient of π results in higher overall temperatures throughout the domain.",
    "The temperature is zero at x=0 and x=1 because of Dirichlet boundary conditions, meaning the boundaries are held at a fixed temperature.",
    "The maximum temperature occurs at (1,1) because it is the highest sum of x² + y².",
    "The temperature profile at x=0.5 is symmetric about y=0.5, with a parabolic variation.",
    "The temperature at (0,0) is 0 degrees.",
    "The boundary condition u(0,y)=0 ensures no heat flux at the left boundary.",
    "The boundary condition u(1,y)=y(1−y) represents a parabolic temperature distribution at x=1.",
    "The temperature decay rate follows an exponential pattern due to the heat dissipation properties of the material.",
    "The spatial pattern remains unchanged while amplitude decreases because the system reaches a self-similar equilibrium.",
    "Increasing alpha accelerates heat dissipation, leading to a faster decay in temperature."
]


In [None]:
# ✅ Convert responses into evaluation format
predictions = results  # Model-generated responses
references = [[ref] for ref in reference_answers]  # Convert to required format

# ✅ Compute BLEU and ROUGE Scores
bleu_score = bleu_metric.compute(predictions=predictions, references=references)
rouge_score = rouge_metric.compute(predictions=predictions, references=references)

# ✅ Print evaluation results
print(f"\nBLEU Score: {bleu_score['score']:.2f}")
print(f"ROUGE-L Score: {rouge_score['rougeL'].mid.fmeasure:.2f}")

# 🚀 Reporting File

#### https://github.com/sajibhalder/FineTune-LLM-Heat-Equation-Solution