In [29]:
import os
import sys
import django
import pandas as pd
from django.db import transaction
from django.core.management.base import BaseCommand



In [3]:
path = os.path.dirname(os.path.dirname(os.getcwd()))
sys.path.insert(0, path)

In [6]:
from a_sourceCode.i_web_scraper import *
from a_sourceCode.ii_stage_data import *
from i_app.app.settings import local 

In [7]:
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "i_app.app.settings.local.py")

'app.settings.local'

In [10]:
django.setup()

In [26]:
from starbuck.models import *

In [30]:
class Command(BaseCommand):
    help = 'Import Starbucks reviews from processed CSV file'

    def add_arguments(self, parser):
        parser.add_argument('file_path', type=str, help='Path to the processed CSV file')

    def handle(self, *args, **options):
        # Load and prepare the DataFrame
        df = pd.read_csv(options['file_path'])
        df.sort_values(by="review_date", ascending=False, inplace=True)
        
        # Import the data
        self.import_data(df)

    def import_data(self, df):
        total_rows = len(df)
        success_count = 0
        
        with transaction.atomic():
            for index, row in df.iterrows():
                try:
                    # Prepare review data
                    review_data = {
                    "review_content": row.get("review_content"),
                    "review_rating": int(float(row.get("review_rating", 0))),  # Convert to integer
                    "review_author": row.get("review_author"),
                    "review_date": row.get("review_date"),
                    "category_ratings": row.get("category_ratings", ""),
                }
                    
                    # Validate required fields
                    if not all([review_data["review_content"], review_data["review_rating"]]):
                        self.stdout.write(self.style.WARNING(
                            f"Skipping row {index}: Missing required fields"
                        ))
                        continue
                    
                    address = row.get("business_address")
                    if not address:
                        self.stdout.write(self.style.WARNING(
                            f"Skipping row {index}: Missing address"
                        ))
                        continue
                    
                    # Create the records
                    result = ProductScrapeEvent.objects.create_scrape_event(
                        data=[review_data],
                        address=address
                    )
                    
                    if result:
                        success_count += 1
                    
                    # Progress feedback
                    if (index + 1) % 100 == 0:
                        self.stdout.write(f"Processed {index + 1}/{total_rows} rows...")
                        
                except Exception as e:
                    self.stdout.write(self.style.ERROR(
                        f"Error on row {index}: {str(e)}"
                    ))
                    continue
        
        # Final report
        self.stdout.write(self.style.SUCCESS(
            f"Import complete! Successfully imported {success_count}/{total_rows} reviews"
        ))


In [11]:
path = "/Users/ericklopez/desktop/django_gun/empirical/data/processed/starbucks_location_processed.csv"

In [14]:
df = pd.read_csv(path)

In [22]:
df.sort_values(by= "review_date",ascending=False, inplace=True)

In [28]:
df.columns

Index(['Unnamed: 0', 'business_address', 'review_author', 'review_date',
       'review_rating', 'review_content', 'category_ratings'],
      dtype='object')