Load data from a CSV file into a DataFrame.

In [None]:
import pandas as pd
data = pd.read_csv('data.csv')

Create a histogram of the specified column in the data.

In [None]:
import matplotlib.pyplot as plt
plt.hist(data['column_name'], bins=10)
plt.show()

Estimate parameters for a normal distribution fit.

In [None]:
import scipy.stats as stats
params = stats.norm.fit(data['column_name'])

Generate the probability density function (PDF) for the fitted distribution.

In [None]:
import numpy as np
x = np.linspace(data['column_name'].min(), data['column_name'].max(), 100)
_pdf = stats.norm.pdf(x, *params)

Evaluate fit by comparing the PDF with the histogram.

In [None]:
plt.hist(data['column_name'], bins=10, density=True, alpha=0.5)
plt.plot(x, _pdf, 'r-')
plt.show()

Review and adjust data by filtering out outliers.

In [None]:
data_cleaned = data[data['column_name'] < threshold]

Perform non-parametric estimation using Kernel Density Estimation (KDE).

In [None]:
from sklearn.neighbors import KernelDensity
kde = KernelDensity(kernel='gaussian', bandwidth=0.5).fit(data_cleaned[['column_name']])

Fit the KDE model and obtain density estimates.

In [None]:
x_d = np.linspace(data_cleaned['column_name'].min(), data_cleaned['column_name'].max(), 1000)
density = np.exp(kde.score_samples(x_d.reshape(-1, 1)))

Compare the KDE with the histogram of the cleaned data.

In [None]:
plt.hist(data_cleaned['column_name'], bins=10, density=True, alpha=0.5)
plt.plot(x_d, density, 'r-')
plt.show()