## 1. Introduction to Python Basics
*	Topics Covered:
    *	Data types
    *   Indexing and slicing
    *   Loading data CSV, Excel files
    *   Working with data
    *   Basic plotting

In [None]:
# Install the required packages for the project
#!pip install pandas
#!pip install numpy
#!pip install matplotlib

In [None]:
# Print a simple message to demonstrate Python syntax
print("Welcome to Python!")

## 2. Basic Data Types in Python

*	Topics Covered:
    *	Strings (str)
    *	Integers (int)
    *	Lists (list)


In [None]:
# Examples of data types
name = "Alice"  # String
age = 30        # Integer
height = 5.6    # Float
items = ["apple", "banana", "cherry"]  # List

print("Name:", name)
print("Age:", age)
print("Height:", height)
print("Shopping List:", items)

In [None]:
# Arithmetic operations
a = 10
b = 3
print('Sum:', a + b)
print('Difference:', a - b)
print('Product:', a * b)
print('Division:', a / b)
print('Floor Division:', a // b)
print('Modulus:', a % b)
print('Exponentiation:', a ** b)

In [None]:
# String operations
first_name = "John"
last_name = "Doe"
full_name = first_name + " " + last_name
print('Full Name:', full_name)
print('Uppercase:', full_name.upper())
print('Lowercase:', full_name.lower())
print('Split Name:', full_name.split())

In [None]:
# List operations
numbers = [1, 2, 3, 4, 5, 6]
print('Original List:', numbers)
numbers.append(7)
print('List after Append:', numbers)
print('Second Element:', numbers[1])
print('Last Element:', numbers[-1])
numbers.sort(reverse=True)
print('List after Sorting:', numbers)

In [None]:
# indexing and slicing
print('First 3 elements:', numbers[:3])
print('Last 3 elements:', numbers[-3:])
print('Elements from 2 to 4:', numbers[1:4])


# In this section we will start using external libraries. 

____

## 3. Loading and Exploring Data

*	Topics Covered:
    *	Reading a CSV file
    *	Exploring basic data statistics

In [None]:
import pandas as pd
import numpy as np

# Set seed for reproducibility
np.random.seed(42)

number_of_values = 200

# Generate data
data = {
    "Control": np.random.normal(loc=30, scale=10, size=number_of_values),
    "Treatment": np.random.normal(loc=60, scale=10, size=number_of_values)
}

# Create DataFrame
df = pd.DataFrame(data)


print(df.head(10))  # Display the first 10 rows to check the data

### To load a dataset from a CSV or Excel file
_____
```sh
import pandas as pd
```

#### CSV file
```sh
csv_file = 'path_to_file.xlsx'
```
```sh
data = pd.read_csv(csv_file)
```

----

#### Excel file
```sh
excel_file = 'path_to_file.xlsx'
```
```sh
data = pd.read_excel(excel_file)
```
-----

In [None]:
# Calculate summary statistics
summary_stats = df[['Control', 'Treatment']].agg(['mean', 'std'])
print(summary_stats)

In [None]:
#df.agg?

## 4. Data Indexing and Selection

*	Topics Covered:
    *	Selecting rows and columns
    *	Conditional selection


In [None]:
# Selecting a column in the dataframe
df['Control'][0:5]

In [None]:
# Selecting rows in the dataframe
df.loc[5:10]

In [None]:
# Adding columns to the dataframe

#  Calculate the difference
df['Difference'] = df['Treatment'] - df['Control']

# Significance effects is assumed to be larger than a threshold of 15
threshold = 30

# Add a boolean column indicating Signinficant treatment effect
df['Significant'] = df['Difference'] > threshold

# Display rows with significant treatment effect
df.head(10)


## 5. Data Visualization

*	Topics Covered:
    *	Creating a linear plot
    *	Generating a box plot
    *	Making a scatter plot


In [None]:
# Combined Bar Plot for Mean Values
plt.figure(figsize=(4, 4))
labels = ['Control', 'Treatment']
means = [summary_stats.loc['mean', 'Control'], summary_stats.loc['mean', 'Treatment']]
errors = [summary_stats.loc['std', 'Control'], summary_stats.loc['std', 'Treatment']]

plt.bar(labels, means, yerr=errors, capsize=10, color=['blue', 'red'])
plt.title('Comparison of Mean Measurements')
plt.ylabel('Mean Values')
plt.show()

In [None]:
# Create a box plot
plt.figure(figsize=(4, 4))
# Positions for the groups
positions = [1, 2]

# Creating the box plot with 'patch_artist=True' to enable face coloring
box = plt.boxplot([df['Control'], df['Treatment']], tick_labels=['Control', 'Treatment'], 
                  positions=positions, patch_artist=True, widths=0.6, whiskerprops={'linewidth':1.5, 'linestyle': '--', 'color':'green'})

# Coloring the boxes
colors = ['blue', 'red']
for patch, color in zip(box['boxes'], colors):
    patch.set_facecolor(color)


plt.title('Box Plot with Whiskers')
plt.ylabel('Values')
plt.show()

In [None]:
# Create a box plot
plt.figure(figsize=(4, 4))
# Positions for the groups
positions = [1, 2]

# Creating the box plot with 'patch_artist=True' to enable face coloring
box = plt.boxplot([df['Control'], df['Treatment']], tick_labels=['Control', 'Treatment'], 
                  positions=positions, patch_artist=True, widths=0.6)

# Coloring the boxes
colors = ['blue', 'red']
for patch, color in zip(box['boxes'], colors):
    patch.set_facecolor(color)

# Adding data points on top of the box plots
for i, data in enumerate([df['Control'], df['Treatment']], start=1):
    x = np.random.normal(i, 0.04, size=len(data))  # Add some jitter to the x-values to spread the points
    plt.plot(x, data, 'o', markersize=2, alpha=0.6, color='k')  # 'k' stands for black

plt.title('Box Plot with Data Points')
plt.ylabel('Values')
plt.show()

In [None]:
plt.figure(figsize=(5, 4))
plt.hist([df['Control'], df['Treatment']], bins=10, alpha=0.7, label=['Control', 'Treatment'], color=['blue', 'red'],density=True )
plt.title('Histogram of Control and Treatment Measurements')
plt.xlabel('Value')
plt.ylabel('Frequency')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
plt.hist?