In [10]:

 pip install ollama


Note: you may need to restart the kernel to use updated packages.


In [11]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import ollama

In [4]:
# load dataset
url = r"C:\Users\Hanshu\Desktop\excel data\Iris.csv"
df = pd.read_csv(url)
df

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...,...
145,146,6.7,3.0,5.2,2.3,Iris-virginica
146,147,6.3,2.5,5.0,1.9,Iris-virginica
147,148,6.5,3.0,5.2,2.0,Iris-virginica
148,149,6.2,3.4,5.4,2.3,Iris-virginica


In [12]:
# display dataset info
print(df.describe())


               Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm
count  150.000000     150.000000    150.000000     150.000000    150.000000
mean    75.500000       5.843333      3.054000       3.758667      1.198667
std     43.445368       0.828066      0.433594       1.764420      0.763161
min      1.000000       4.300000      2.000000       1.000000      0.100000
25%     38.250000       5.100000      2.800000       1.600000      0.300000
50%     75.500000       5.800000      3.000000       4.350000      1.300000
75%    112.750000       6.400000      3.300000       5.100000      1.800000
max    150.000000       7.900000      4.400000       6.900000      2.500000


In [13]:
# missing values check
print("\nMissing Values:\n", df.isnull().sum())


Missing Values:
 Id               0
SepalLengthCm    0
SepalWidthCm     0
PetalLengthCm    0
PetalWidthCm     0
Species          0
dtype: int64


In [7]:
import ollama

def generate_insights(df_summary):                                                                         # wuth def keyword i created a fun
    prompt = f'analyze the dataset summary and provide insights:\n\n{df_summary}'
    response = ollama.chat(model = 'gemma:2b', messages=[{'role':'user' , 'content': prompt}])
    return response['message']['content']

# generate AI Insights
summary = df.describe().to_string()                                #describe()--> descriptive statistics        # to_string() --> converting every thin to the string 
insights = generate_insights(summary)
print('\n* AI-Generated Insights:\n' , insights)


* AI-Generated Insights:
 ## Insights from the Dataset Summary

**Overall:**

* The dataset contains information on 150 flower specimens.
* The data covers a wide range of features, including:
    * **Petal length and width**: These features are relatively consistent, with a mean of 5.8 and standard deviation of 0.8.
    * **Sepal length and width**: These features have a larger variability, with a mean of 75.5 and standard deviation of 43.4.
    * **Petal color**: This feature has a relatively lower mean of 1.3 compared to the other features.
    * **Sepal length**: This feature has a higher mean of 75.5 compared to other features.
    * **Sepal width**: This feature has a lower mean of 3.0 compared to other features.

**Additional observations:**

* The minimum and 25th percentile values indicate that the smallest petals are 1 cm long and the smallest sepal length is 4.3 cm.
* The 75th percentile and maximum values indicate that the largest petals are 6.9 cm long and the largest sep

In [14]:
import gradio as gr
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import ollama


# function to perform EDA and Generate visualization
def eda_analysis(file_path):
    df = pd.read_csv(file_path)   # my datset read file path (app.py(in EDA LLM Folder ))
    
    # fill missing values with median for numeric columns
    for col in df.select_dtypes(include=['number']).columns:          # for col--> for entire column
        df[col].fillna(df[col].median(), inplace=True)
        
        
    # fill missing values with median for categorical columns
    for col in df.select_dtypes(include=['object']).columns:
        df[col].fillna(df[col].mode()[0] , inplace=True)
            
     # data summary
    summary = df.describe(include='all').to_string()
    
    
    # missing values
    missing_values = df.isnull().sum().to_string() 
    
    # Generate AI Insights
    insights = generate_ai_insights(summary) 
    
    #Generate data viz
    plot_paths = generate_visualizations(df)
    
    return f"\n Data Loaded Successfully!\n\n Summary:\n{summary}\n\n Missing Values:\n{missing_values}\n\n AI Insights:\n{insights}", plot_paths

# AI-Powered Insights using Mistral-7B (Ollama)
def generate_ai_insights(df_summary):
    prompt = f"Analyze the dataset summary and provide insights:\n\n{df_summary}"
    response = ollama.chat(model="gemma:2b", messages=[{"role": "user", "content": prompt}])
    return response['message']['content']

# Function to Generate Data Visualizations
def generate_visualizations(df):
    plot_paths = []
    
    # Histograms for Numeric Columns
    for col in df.select_dtypes(include=['number']).columns:
        plt.figure(figsize=(6,4))
        sns.histplot(df[col], bins=30, kde=True, color="blue")
        plt.title(f"Distribution of {col}")
        path = f"{col}_distribution.png"
        plt.savefig(path)
        plot_paths.append(path)
        plt.close()
    
    # Correlation Heatmap (only numeric columns)
    numeric_df = df.select_dtypes(include=['number'])
    if not numeric_df.empty:
        plt.figure(figsize=(8,5))
        sns.heatmap(numeric_df.corr(), annot=True, cmap='coolwarm', fmt=".2f", linewidths=0.5)
        plt.title("Correlation Heatmap")
        path = "correlation_heatmap.png"
        plt.savefig(path)
        plot_paths.append(path)
        plt.close()

    return plot_paths

# Gradio Interface
demo = gr.Interface(
    fn=eda_analysis,
    inputs=gr.File(type="filepath"),
    outputs=[gr.Textbox(label="EDA Report"), gr.Gallery(label="Data Visualizations")],
    title="📊 LLM-Powered Exploratory Data Analysis (EDA)",
    description="Upload any dataset CSV file and get automated EDA insights with AI-powered analysis and visualizations."
)

# Launch the Gradio App
demo.launch(share=True)

            
            
            
        
        

* Running on local URL:  http://127.0.0.1:7862

Could not create share link. Please check your internet connection or our status page: https://status.gradio.app.




The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(df[col].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(df[col].mode()[0] , inplace=True)
