In [2]:
import pandas as pd
import numpy as np
import boto3

# Set up the random seed for reproducibility
np.random.seed(42)

# Number of samples
n_samples = 20000

# Generate random data for the input features
square_footage = np.random.randint(400, 3000, size=n_samples)
num_rooms = np.random.randint(1, 6, size=n_samples)
num_bathrooms = np.random.randint(1, 4, size=n_samples)
has_balcony = np.random.randint(0, 2, size=n_samples)
has_gym_access = np.random.randint(0, 2, size=n_samples)
has_parking = np.random.randint(0, 2, size=n_samples)
neighborhood_safety_index = np.random.uniform(1, 10, size=n_samples).round(2)
walk_score = np.random.randint(0, 100, size=n_samples)
school_rating = np.random.uniform(1, 10, size=n_samples).round(1)

# Simulate rent based on a combination of features with some added noise
rent = (
    (square_footage * 1.5) + 
    (num_rooms * 150) + 
    (num_bathrooms * 200) + 
    (has_balcony * 100) + 
    (has_gym_access * 75) + 
    (has_parking * 50) + 
    (neighborhood_safety_index * 100) + 
    (walk_score * 10) + 
    (school_rating * 50) +
    np.random.normal(0, 250, n_samples)  # adding some noise
)

# Create DataFrame
df = pd.DataFrame({
    'SquareFootage': square_footage,
    'NumRooms': num_rooms,
    'NumBathrooms': num_bathrooms,
    'HasBalcony': has_balcony,
    'HasGymAccess': has_gym_access,
    'HasParking': has_parking,
    'NeighborhoodSafetyIndex': neighborhood_safety_index,
    'WalkScore': walk_score,
    'SchoolRating': school_rating,
    'Rent': rent.round(2)  # rounding Rent to 2 decimal places
})

# Save to CSV
file_name = 'rental_pricing_dataset.csv'
df.to_csv(file_name, index=False)

# Upload to S3
s3 = boto3.client('s3')
bucket_name = 'apartment-pricing'  # replace with your S3 bucket name
s3.upload_file(file_name, bucket_name, "TrainData/"+file_name)

print(f"File uploaded to s3://{bucket_name}/TrainData/{file_name}")

File uploaded to s3://apartment-pricing/TrainData/rental_pricing_dataset.csv


In [3]:
import pandas as pd
import numpy as np
import boto3

# Set up the random seed for reproducibility
np.random.seed(40)

# Number of samples
n_samples = 5000

# Generate random data for the input features
square_footage = np.random.randint(400, 3000, size=n_samples)
num_rooms = np.random.randint(1, 6, size=n_samples)
num_bathrooms = np.random.randint(1, 4, size=n_samples)
has_balcony = np.random.randint(0, 2, size=n_samples)
has_gym_access = np.random.randint(0, 2, size=n_samples)
has_parking = np.random.randint(0, 2, size=n_samples)
neighborhood_safety_index = np.random.uniform(1, 10, size=n_samples).round(2)
walk_score = np.random.randint(0, 100, size=n_samples)
school_rating = np.random.uniform(1, 10, size=n_samples).round(1)

# Simulate rent based on a combination of features with some added noise
rent = (
    (square_footage * 1.5) + 
    (num_rooms * 150) + 
    (num_bathrooms * 200) + 
    (has_balcony * 100) + 
    (has_gym_access * 75) + 
    (has_parking * 50) + 
    (neighborhood_safety_index * 100) + 
    (walk_score * 10) + 
    (school_rating * 50) +
    np.random.normal(0, 250, n_samples)  # adding some noise
)

# Create DataFrame
df = pd.DataFrame({
    'SquareFootage': square_footage,
    'NumRooms': num_rooms,
    'NumBathrooms': num_bathrooms,
    'HasBalcony': has_balcony,
    'HasGymAccess': has_gym_access,
    'HasParking': has_parking,
    'NeighborhoodSafetyIndex': neighborhood_safety_index,
    'WalkScore': walk_score,
    'SchoolRating': school_rating,
    'Rent': rent.round(2)  # rounding Rent to 2 decimal places
})

# Save to CSV
file_name = 'rental_pricing_dataset.csv'
df.to_csv(file_name, index=False)

# Upload to S3
s3 = boto3.client('s3')
bucket_name = 'apartment-pricing'  # replace with your S3 bucket name
s3.upload_file(file_name, bucket_name, "ValidData/"+file_name)

print(f"File uploaded to s3://{bucket_name}/ValidData/{file_name}")

File uploaded to s3://apartment-pricing/ValidData/rental_pricing_dataset.csv


In [21]:
# ! git clone --branch main https://github.com/aws/amazon-sagemaker-examples.git

Cloning into 'amazon-sagemaker-examples'...
remote: Enumerating objects: 35689, done.[K
remote: Counting objects: 100% (1050/1050), done.[K
remote: Compressing objects: 100% (683/683), done.[K
remote: Total 35689 (delta 420), reused 853 (delta 343), pack-reused 34639 (from 1)[K
Receiving objects: 100% (35689/35689), 637.69 MiB | 30.89 MiB/s, done.
Resolving deltas: 100% (20431/20431), done.
Updating files: 100% (2977/2977), done.


In [2]:
# ! git clone --branch master https://github.com/ChandraLingam/AmazonSageMakerCourse.git

Cloning into 'AmazonSageMakerCourse'...
remote: Enumerating objects: 949, done.[K
remote: Counting objects: 100% (289/289), done.[K
remote: Compressing objects: 100% (67/67), done.[K
remote: Total 949 (delta 250), reused 227 (delta 220), pack-reused 660 (from 1)[K
Receiving objects: 100% (949/949), 67.10 MiB | 33.44 MiB/s, done.
Resolving deltas: 100% (524/524), done.
