In [1]:
import numpy as np

# नाणेफेकीचे सिम्युलेशन
# 0 म्हणजे काटा (Tails), 1 म्हणजे छापा (Heads)
num_flips = 10000
flips = np.random.randint(0, 2, num_flips)

# पहिल्या १० फेकी
print("First 10 flips:", flips[:10])

# एकूण छापांची संख्या मोजणे
num_heads = np.sum(flips)
print(f"\nIn {num_flips} flips, we got {num_heads} heads.")

# छापा मिळण्याचे प्रमाण काढणे
proportion_heads = num_heads / num_flips
print(f"The proportion of heads is: {proportion_heads:.4f}")

First 10 flips: [0 1 0 0 1 1 1 1 1 1]

In 10000 flips, we got 5024 heads.
The proportion of heads is: 0.5024


In [2]:
import pandas as pd
import seaborn as sns

# टायटॅनिक डेटासेट लोड करणे
titanic_df = sns.load_dataset('titanic')

# 10 प्रवाशांचा एक सिंपल रँडम सॅम्पल घेणे
simple_random_sample = titanic_df.sample(n=10, random_state=42) # random_state मुळे प्रत्येक वेळी तोच सॅम्पल मिळतो

print("Simple Random Sample of 10 passengers:\n")
print(simple_random_sample)

Simple Random Sample of 10 passengers:

     survived  pclass     sex   age  sibsp  parch     fare embarked   class  \
709         1       3    male   NaN      1      1  15.2458        C   Third   
439         0       2    male  31.0      0      0  10.5000        S  Second   
840         0       3    male  20.0      0      0   7.9250        S   Third   
720         1       2  female   6.0      0      1  33.0000        S  Second   
39          1       3  female  14.0      1      0  11.2417        C   Third   
290         1       1  female  26.0      0      0  78.8500        S   First   
300         1       3  female   NaN      0      0   7.7500        Q   Third   
333         0       3    male  16.0      2      0  18.0000        S   Third   
208         1       3  female  16.0      0      0   7.7500        Q   Third   
136         1       1  female  19.0      0      2  26.2833        S   First   

       who  adult_male deck  embark_town alive  alone  
709    man        True  NaN    Che

In [3]:
from sklearn.model_selection import StratifiedShuffleSplit

# आपण pclass नुसार सॅम्पल घेऊ इच्छितो
# प्रथम, NaN व्हॅल्यूज असलेल्या ओळी काढूया इंजिनिअरिंग स्प्लिट व्यवस्थित होईल
titanic_clean = titanic_df.dropna(subset=['pclass'])

# StratifiedShuffleSplit चा ऑब्जेक्ट तयार करणे
# आपल्याला 10% आकाराचा एक सॅम्पल हवा आहे
split = StratifiedShuffleSplit(n_splits=1, test_size=0.10, random_state=42)

# इंजिनिअरिंग विभागणी करणे
for train_index, sample_index in split.split(titanic_clean, titanic_clean['pclass']):
    stratified_sample = titanic_clean.iloc[sample_index]

print("Stratified Sample (stratified by 'pclass'):\n")
print(stratified_sample)

print("\nOriginal Pclass distribution:\n", titanic_df['pclass'].value_counts(normalize=True).round(2))
print("\nSample Pclass distribution:\n", stratified_sample['pclass'].value_counts(normalize=True).round(2))

Stratified Sample (stratified by 'pclass'):

     survived  pclass     sex   age  sibsp  parch     fare embarked   class  \
697         1       3  female   NaN      0      0   7.7333        Q   Third   
197         0       3    male  42.0      0      1   8.4042        S   Third   
233         1       3  female   5.0      4      2  31.3875        S   Third   
545         0       1    male  64.0      0      0  26.0000        S   First   
77          0       3    male   NaN      0      0   8.0500        S   Third   
..        ...     ...     ...   ...    ...    ...      ...      ...     ...   
437         1       2  female  24.0      2      3  18.7500        S  Second   
0           0       3    male  22.0      1      0   7.2500        S   Third   
17          1       2    male   NaN      0      0  13.0000        S  Second   
97          1       1    male  23.0      0      1  63.3583        C   First   
446         1       2  female  13.0      0      1  19.5000        S  Second   

      

In [4]:
import pandas as pd
import seaborn as sns
from scipy import stats # scipy.stats मॉड्यूल इम्पोर्ट करणे

# टायटॅनिक डेटासेट लोड करणे
titanic_df = sns.load_dataset('titanic')

# गहाळ माहिती असलेल्या ओळी काढणे
titanic_df.dropna(subset=['fare', 'pclass'], inplace=True)

# गट १: पहिल्या वर्गातील प्रवाशांचे भाडे
pclass1_fares = titanic_df[titanic_df['pclass'] == 1]['fare']

# गट २: तिसऱ्या वर्गातील प्रवाशांचे भाडे
pclass3_fares = titanic_df[titanic_df['pclass'] == 3]['fare']

# दोन्ही गटांच्या सरासरीची तपासणी
print(f"Mean fare for Pclass 1: {pclass1_fares.mean():.2f}")
print(f"Mean fare for Pclass 3: {pclass3_fares.mean():.2f}")

# इंडिपेंडेंट सॅम्पल्स टी-टेस्ट चालवणे
# equal_var=False कारण दोन्ही गटांचे मानक विचलन (std dev) खूप वेगळे असण्याची शक्यता आहे
t_statistic, p_value = stats.ttest_ind(pclass1_fares, pclass3_fares, equal_var=False)

print("\n--- T-test Results ---")
print(f"T-statistic: {t_statistic:.4f}")
print(f"P-value: {p_value}")

# निकालाचे विश्लेषण
alpha = 0.05
if p_value < alpha:
    print("\nConclusion: The p-value is less than alpha.")
    print("We reject the Null Hypothesis.")
    print("There is a statistically significant difference in the mean fares between Pclass 1 and Pclass 3.")
else:
    print("\nConclusion: The p-value is greater than alpha.")
    print("We fail to reject the Null Hypothesis.")
    print("There is no statistically significant difference in the mean fares.")

Mean fare for Pclass 1: 84.15
Mean fare for Pclass 3: 13.68

--- T-test Results ---
T-statistic: 13.1502
P-value: 1.6599902021623374e-29

Conclusion: The p-value is less than alpha.
We reject the Null Hypothesis.
There is a statistically significant difference in the mean fares between Pclass 1 and Pclass 3.
