# Simulation and Analysis of Smartphone Data

## Libraries and settings

In [None]:
# Libraries
import os
import random
import numpy as np
import pandas as pd
import seaborn as sns
import scipy.stats as stats
import matplotlib.pyplot as plt
from IPython.display import Image

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

# Show current working directory
print(os.getcwd())

In [None]:
Image("smartphones.jpg", width=500)

## Simulation of smarthone data

In [None]:
# Define lists of possible brands and types
brands = ['Apple', 'Samsung', 'Google']
types = ['iPhone 13', 'Galaxy S23', 'Google Pixel 8']

# Set parameters for the normal distribution of prices for each brand
brand_parameters = {
    'Apple': {'mean': 900, 'std': 200},
    'Samsung': {'mean': 800, 'std': 150},
    'Google': {'mean': 700, 'std': 120}
}

# Number of entries
p_n = 10000

# Set a seed for random number generation
random.seed(42)  # Set a seed for the random module
np.random.seed(42)  # Set a seed for NumPy

# Generate random data
data = []

for _ in range(p_n):
    brand = random.choice(brands)
    phone_type = random.choice(types)
    price_mean = brand_parameters[brand]['mean']
    price_std = brand_parameters[brand]['std']
    price = int(np.random.normal(loc=price_mean, scale=price_std))
    data.append([brand, phone_type, price])

# Create a DataFrame
df = pd.DataFrame(data, columns=['Brand', 'Type', 'Price'])
df.head()

## Grouped boxplots showing how smartphone prices vary per brand

In [None]:
# Create a grouped boxplot using Seaborn
plt.figure(figsize=(8,2))
plt.title('Smartphone prices by brand')
plt.xlabel('Brand')
plt.ylabel('Price')
sns.boxplot(x='Price', 
            y='Brand', 
            data=df,
            palette='Set2',
            orient='h')
plt.show()

## Anlysis of Variance (ANOVA)

In [None]:
# Hypotheses:
# H0: There are no price differences between groups (smartphone brands)
# H1: There are price differences between groups (smartphone brands)

# Significance level alpha
alpha = 0.05

# Create ANOVA
fvalue, pvalue = stats.f_oneway(df.loc[df['Brand'] == 'Apple']['Price'],
                                df.loc[df['Brand'] == 'Samsung']['Price'],
                                df.loc[df['Brand'] == 'Google']['Price'])

# Print results
print('F-value:', fvalue.round(3), 'p-value', pvalue.round(4))

# Interpretation
print('\nInterpretation:')
if pvalue < alpha:
    print(f"The differences in smartphone prices among brands are statistically significant at the {alpha} significance level.")
else:
    print(f"There is no statistically significant difference in smartphone prices among brands at the {alpha} significance level.")

### Jupyter notebook --footer info-- (please always provide this at the end of each notebook)

In [None]:
import os
import platform
import socket
from platform import python_version
from datetime import datetime

print('-----------------------------------')
print(os.name.upper())
print(platform.system(), '|', platform.release())
print('Datetime:', datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
print('Python Version:', python_version())
print('-----------------------------------')