In [0]:
%sql
CREATE VOLUME IF NOT EXISTS workspace.default.usage_metrics;

# üöÄ Project Title: Sample Data Engineering Task

## üìã Task Description
This notebook demonstrates how to **prepare, clean, and save data** into a Delta table.  
We will focus on ensuring schema compatibility, handling invalid column names, and applying best practices for PySpark workflows.  

---

## üë©‚Äçüíª Author
<span style="color:blue">**Pakkir_Fathima**</span>

---

## üñºÔ∏è Our Team
![Team Photo](https://fpimages.withfloats.com/actual/6929d1ac956d0a744b5c9822.jpeg)

---

### ‚ú® Notes
- Use **bold** for emphasis.  
- Use *italics* for highlighting important terms.  
- Headings (`#`, `##`, `###`) organize the notebook clearly.  

In [0]:
# COMMAND ----------
## üíª Step 1: Install and Import the necessary libraries

# In most modern Databricks runtimes, 'requests' is already available.
# If not, you would run a command like:
# %pip install requests
import requests
print("Libraries imported successfully.")

# COMMAND ----------
## üöÄ Step 2: Define URL and Fetch Data using requests

# Define the source URL
data_url = "https://public.tableau.com/app/sample-data/mobile_os_usage.csv"

# Perform the API call using the requests library
# A variable named 'response' will be created in the Python environment
response = requests.get(data_url)

# Check if the request was successful
if response.status_code == 200:
    print(f"Successfully fetched data from: {data_url}")
else:
    print(f"Error fetching data. Status Code: {response.status_code}")
    raise Exception("Failed to fetch data.")


# COMMAND ----------
## üíæ Step 3: Write the Data to the Volume using dbutils.fs.put

# Define the target path in the volume
volume_path = "/Volumes/workspace/default/usage_metrics/mobile_os_usage.csv"

# Use dbutils.fs.put to write the content of the response.text
try:
    # 'response' variable is accessible because the notebook language is Python
    dbutils.fs.put(volume_path, response.text, overwrite=True)
    print(f"Data successfully written to Volume at: {volume_path}")
except Exception as e:
    # This exception handler will catch Volume-related errors (permissions, path not found)
    print(f"Error writing data to Volume: {e}")
    raise e

# COMMAND ----------
## ‚úÖ Step 4: Verify the file was written (Optional)
# This will list the contents of the directory to confirm the file exists
print("\nVerifying file creation:")
dbutils.fs.ls("/Volumes/workspace/default/usage_metrics/")

# COMMAND ----------

In [0]:
%run
"/Workspace/Users/pakkirmohamad25@gmail.com/databricks-code-repo-irfan/databricks_workouts_2025/1_DATABRICKS_NOTEBOOK_FUNDAMENTALS/4_child_notebook"

In [0]:
%sh
ls /Volumes/workspace/default/usage_metrics
head /Volumes/workspace/default/usage_metrics/mobile_os_usage.csv

In [0]:
# Databricks notebook source
# MAGIC %python
# Read the CSV file from the volume into a PySpark DataFrame
df1 = spark.read.option("header", "true").csv("/Volumes/workspace/default/usage_metrics/mobile_os_usage.csv")

# Show the first few rows
df1.show()

In [0]:
# Databricks notebook source
# MAGIC %python
# Read the CSV file into a PySpark DataFrame
df1 = spark.read.option("header", "true").csv("/Volumes/workspace/default/usage_metrics/mobile_os_usage.csv")

# Rename columns to remove spaces
df1 = df1.withColumnRenamed("Mobile Operating System", "Mobile_Operating_System") \
         .withColumnRenamed("Percent of Usage", "Percent_of_Usage")

# Show the first few rows
df1.show()

# Write the DataFrame into a Databricks table
df1.write.mode("overwrite").saveAsTable("default.mobile_os_usage")

In [0]:
%sql
select * from mobile_os_usage;

In [0]:

## üíª Step 1: Define the Python Function

def convert_to_uppercase(input_string):
  """
  Converts the input string to uppercase using the built-in .upper() method.
  """
  # Check if the input is a string before attempting the conversion
  if isinstance(input_string, str):
    return input_string.upper()
  else:
    # Handle non-string input gracefully
    return str(input_string).upper()



## üöÄ Step 2: Use the Magic Command to Execute the Function

# Define inputs
text_1 = "This is a test sentence."
text_2 = "PySpark is fun."

# Call the function and print the results
result_1 = convert_to_uppercase(text_1)
result_2 = convert_to_uppercase(text_2)

print(f"Original 1: '{text_1}'")
print(f"Uppercase 1: '{result_1}'")
print("-" * 20)
print(f"Original 2: '{text_2}'")
print(f"Uppercase 2: '{result_2}'")



In [0]:
%pip install pandas

In [0]:
# COMMAND ----------
## üíª Step 1: Import Pandas and define file path

# Ensure pandas is installed if necessary (usually pre-installed in Databricks)
# %pip install pandas

import pandas as pd
print("Pandas imported successfully.")

# Replace this with the actual path to your CSV file in DBFS or a Volume
# Example path to the data we used previously:
file_path = "/Volumes/workspace/default/usage_metrics/mobile_os_usage.csv"


## üöÄ Step 2: Read CSV and Display Output

# Use the pandas read_csv function
try:
    df_pandas = pd.read_csv(file_path)

    # Display the output. 
    # In Databricks, simply outputting the pandas DataFrame variable
    # often triggers a rich HTML display in the notebook output.
    print("--- First 5 Rows of Data ---")
    display(df_pandas.head())

    print("\n--- DataFrame Information ---")
    df_pandas.info()

except Exception as e:
    print(f"An error occurred: {e}")
    print("Please ensure the file path is correct and the file exists.")

# COMMAND ----------

In [0]:
%sh
echo "Magic commands tasks completed"
