# Statistical Tools Using Pandas 

In [1]:
import pandas as pd

# Sample data in dictionary format
data = {
    'Age': [28, 24, 22, 25, 29],
    'Height': [165, 180, 155, 172, 168],
    'Weight': [58, 75, 50, 65, 60]
}

  from pandas.core.computation.check import NUMEXPR_INSTALLED


In [2]:
# Create DataFrame from the dictionary
df = pd.DataFrame(data)

### describe(): Generates descriptive statistics of the DataFrame, 
### providing mean, standard deviation, minimum, maximum, quartiles, and more for each numerical column.

In [3]:
# Summary Statistics
print("Summary Statistics:")
print(df.describe())

Summary Statistics:
             Age      Height   Weight
count   5.000000    5.000000   5.0000
mean   25.600000  168.000000  61.6000
std     2.880972    9.192388   9.2358
min    22.000000  155.000000  50.0000
25%    24.000000  165.000000  58.0000
50%    25.000000  168.000000  60.0000
75%    28.000000  172.000000  65.0000
max    29.000000  180.000000  75.0000


### 25th Percentile (Q1 or First Quartile):
### The 25th percentile is the value below which 25% of the data falls.
### 50th Percentile (Median):
### The 50th percentile is the middle value of the dataset when it is sorted in ascending order.
### 75th Percentile (Q3 or Third Quartile):
### The 75th percentile is the value below which 75% of the data falls. 


In [4]:
print(data)
# Mean, Median, Mode
print("\nMean:")
print(df.mean())
print("\nMedian:")
print(df.median())
print("\nMode:")
print(df.mode())

{'Age': [28, 24, 22, 25, 29], 'Height': [165, 180, 155, 172, 168], 'Weight': [58, 75, 50, 65, 60]}

Mean:
Age        25.6
Height    168.0
Weight     61.6
dtype: float64

Median:
Age        25.0
Height    168.0
Weight     60.0
dtype: float64

Mode:
   Age  Height  Weight
0   22     155      50
1   24     165      58
2   25     168      60
3   28     172      65
4   29     180      75


In [5]:
# Correlation
print("\nCorrelation:")
print(df.corr())


Correlation:
             Age   Height    Weight
Age     1.000000  0.16992  0.039462
Height  0.169920  1.00000  0.989410
Weight  0.039462  0.98941  1.000000


### The output you provided is the correlation matrix of a DataFrame containing three columns: 
###    'Age', 'Height', and 'Weight'. The correlation matrix shows the correlation coefficients between each pair of columns.
### The correlation coefficient measures the strength and direction of a linear relationship between two variables. It ranges from -1 to 1:

### -1: Perfect negative correlation (as one variable increases, the other decreases linearly).
### 0: No linear correlation (variables are not linearly related).
### 1: Perfect positive correlation (as one variable increases, the other increases linearly).
### The correlation coefficient between 'Age' and 'Height' is approximately 0.16992, which is close to 0, indicating a weak positive correlation. This suggests that there is a slight positive relationship between age and height, but it is not very strong.

In [6]:
# Assuming you have a CSV file named 'data1.csv' with the same data as in the dictionary example
df_csv = pd.read_csv('data1.csv')

# Display the DataFrame
print(df_csv)

   Age  Height  Weight
0   28     165      58
1   24     180      75
2   22     155      50
3   25     172      65
4   29     168      60


In [7]:
# Summary Statistics
print("Summary Statistics:")
print(df_csv.describe())

Summary Statistics:
             Age      Height   Weight
count   5.000000    5.000000   5.0000
mean   25.600000  168.000000  61.6000
std     2.880972    9.192388   9.2358
min    22.000000  155.000000  50.0000
25%    24.000000  165.000000  58.0000
50%    25.000000  168.000000  60.0000
75%    28.000000  172.000000  65.0000
max    29.000000  180.000000  75.0000


In [8]:
# Mean, Median, Mode
print("\nMean:")
print(df_csv.mean())
print("\nMedian:")
print(df_csv.median())
print("\nMode:")
print(df_csv.mode())


Mean:
Age        25.6
Height    168.0
Weight     61.6
dtype: float64

Median:
Age        25.0
Height    168.0
Weight     60.0
dtype: float64

Mode:
   Age  Height  Weight
0   22     155      50
1   24     165      58
2   25     168      60
3   28     172      65
4   29     180      75


In [9]:
# Correlation
print("\nCorrelation:")
print(df_csv.corr())


Correlation:
             Age   Height    Weight
Age     1.000000  0.16992  0.039462
Height  0.169920  1.00000  0.989410
Weight  0.039462  0.98941  1.000000
