In [1]:
pip install ucimlrepo

Note: you may need to restart the kernel to use updated packages.


In [2]:
from ucimlrepo import fetch_ucirepo

In [3]:
pip install pandas scipy

Note: you may need to restart the kernel to use updated packages.


In [4]:
import pandas as pd
from scipy.stats import kurtosis, skew

In [5]:
#fetcg dataset
airfoil_self_noise = fetch_ucirepo(id=291)

In [6]:
#data (as pandas dataframes)
X = airfoil_self_noise.data.features
y = airfoil_self_noise.data.targets

In [7]:
#metadata
print(airfoil_self_noise.metadata)

{'uci_id': 291, 'name': 'Airfoil Self-Noise', 'repository_url': 'https://archive.ics.uci.edu/dataset/291/airfoil+self+noise', 'data_url': 'https://archive.ics.uci.edu/static/public/291/data.csv', 'abstract': 'NASA data set, obtained from a series of aerodynamic and acoustic tests of two and three-dimensional airfoil blade sections conducted in an anechoic wind tunnel.', 'area': 'Physics and Chemistry', 'tasks': ['Regression'], 'characteristics': ['Multivariate'], 'num_instances': 1503, 'num_features': 5, 'feature_types': ['Real'], 'demographics': [], 'target_col': ['scaled-sound-pressure'], 'index_col': None, 'has_missing_values': 'no', 'missing_values_symbol': None, 'year_of_dataset_creation': 1989, 'last_updated': 'Fri Mar 29 2024', 'dataset_doi': '10.24432/C5VW2C', 'creators': ['Thomas Brooks', 'D. Pope', 'Michael Marcolini'], 'intro_paper': None, 'additional_info': {'summary': 'The NASA data set comprises different size NACA 0012 airfoils at various wind tunnel speeds and angles of

In [8]:
#Variable information
print(airfoil_self_noise.variables)

                                  name     role        type demographic  \
0                            frequency  Feature     Integer        None   
1                         attack-angle  Feature      Binary        None   
2                         chord-length  Feature  Continuous        None   
3                 free-stream-velocity  Feature  Continuous        None   
4  suction-side-displacement-thickness  Feature  Continuous        None   
5                scaled-sound-pressure   Target  Continuous        None   

  description units missing_values  
0        None    Hz             no  
1        None   deg             no  
2        None     m             no  
3        None   m/s             no  
4        None     m             no  
5        None    dB             no  


In [9]:
#Combine features (X) and target (y) into a single DataFrame
data = pd.concat([X, y], axis=1)

In [14]:
#Compute descriptive statistics
descriptive_stats = {
    "Mean":data.mean(),
    "Variance":data.var(),
    "Standard Deviation":data.std(),
    "Median":data.median(),
    "Kurtosis": kurtosis(data, fisher=False), #Fisher=False for Pearson's definition
    "Skewness": skew(data),
    "Range":data.max() - data.min()
}

In [17]:
#Converts the results to a Dataframe for better readability
stats_df = pd.DataFrame(descriptive_stats)
print(stats_df)

                                            Mean      Variance  \
frequency                            2886.380572  9.938717e+06   
attack-angle                            6.782302  3.502424e+01   
chord-length                            0.136548  8.749868e-03   
free-stream-velocity                   50.860745  2.425116e+02   
suction-side-displacement-thickness     0.011140  1.729287e-04   
scaled-sound-pressure                 124.835943  4.759146e+01   

                                     Standard Deviation       Median  \
frequency                                   3152.573137  1600.000000   
attack-angle                                   5.918128     5.400000   
chord-length                                   0.093541     0.101600   
free-stream-velocity                          15.572784    39.600000   
suction-side-displacement-thickness            0.013150     0.004957   
scaled-sound-pressure                          6.898657   125.721000   

                                