In [10]:
from typing import List, Dict, Callable
from datetime import datetime

class AlbumReviewData:
    def __init__(self, data: List[Dict]):
        self.data = data

    def filter_by_critics_count(self, min_critics: int = None, max_critics: int = None) -> List[Dict]:
        """Filter albums by the number of critics that rated them."""
        filtered_data = []
        for album in self.data:
            critics_count = sum(1 for score in album.values() if isinstance(score, (int, float)))
            if (min_critics is None or critics_count >= min_critics) and (max_critics is None or critics_count <= max_critics):
                filtered_data.append(album)
        return filtered_data

    def filter_by_metascore_range(self, min_score: int = None, max_score: int = None) -> List[Dict]:
        """Filter albums by their Metascore range."""
        filtered_data = []
        for album in self.data:
            metascore = album.get("metascore")
            if metascore is not None and (min_score is None or metascore >= min_score) and (max_score is None or metascore <= max_score):
                filtered_data.append(album)
        return filtered_data

    def filter_by_date_range(self, start_date: str = None, end_date: str = None) -> List[Dict]:
        """Filter albums by their release date range."""
        filtered_data = []
        for album in self.data:
            release_date = album.get("date")
            if release_date is not None:
                release_date = datetime.strptime(release_date, "%Y-%m-%d")
                if (start_date is None or release_date >= datetime.strptime(start_date, "%Y-%m-%d")) and (end_date is None or release_date <= datetime.strptime(end_date, "%Y-%m-%d")):
                    filtered_data.append(album)
        return filtered_data

    def filter_by_userscore_range(self, min_score: float = None, max_score: float = None) -> List[Dict]:
        """Filter albums by their user score range."""
        filtered_data = []
        for album in self.data:
            userscore = album.get("userscore")
            if userscore is not None and (min_score is None or userscore >= min_score) and (max_score is None or userscore <= max_score):
                filtered_data.append(album)
        return filtered_data

    def apply_filter(self, filter_func: Callable[[Dict], bool]) -> List[Dict]:
        """Apply a custom filter function to the data."""
        return [album for album in self.data if filter_func(album)]

# Example usage
data = [
    # ... (your data here)
]

album_data = AlbumReviewData(data)

# Filter by at least 5 critics and Metascore between 70 and 90
filtered_data = album_data.filter_by_critics_count(min_critics=5)
filtered_data = album_data.filter_by_metascore_range(min_score=70, max_score=90)

# Filter by release date in 2005
filtered_data = album_data.filter_by_date_range(start_date="2005-01-01", end_date="2005-12-31")

# Filter by user score greater than 8.0
filtered_data = album_data.filter_by_userscore_range(min_score=8.0)

# Apply a custom filter function
def has_critic_review(album: Dict) -> bool:
    return any(isinstance(score, (int, float)) for score in album.values())

filtered_data = album_data.apply_filter(has_critic_review)


In [20]:
import pandas as pd
from datetime import datetime

# Load data
path = r"C:\Users\texta\Downloads\mc_critic_reviews.csv"
df = pd.read_csv(path, encoding='latin-1')

# Print column names to verify
print(df.columns)

# Filter by the number of critics
def filter_by_critics_count(df, min_critics=None, max_critics=None):
    critics_count = df.iloc[:, 2:12].notna().sum(axis=1)  # Count non-null values in the critic score columns
    mask = (critics_count >= min_critics) if min_critics else pd.Series([True] * len(df))
    mask &= (critics_count <= max_critics) if max_critics else pd.Series([True] * len(df))
    return df[mask]

# Filter by Metascore range
def filter_by_metascore_range(df, min_score=None, max_score=None):
    mask = (df['metascore'] >= min_score) if min_score else pd.Series([True] * len(df))
    mask &= (df['metascore'] <= max_score) if max_score else pd.Series([True] * len(df))
    return df[mask]

# Filter by date range
def filter_by_date_range(df, start_date=None, end_date=None):
    df['date'] = pd.to_datetime(df['date'])
    mask = (df['date'] >= pd.to_datetime(start_date, format='%Y-%m-%d')) if start_date else pd.Series([True] * len(df))
    mask &= (df['date'] <= pd.to_datetime(end_date, format='%Y-%m-%d')) if end_date else pd.Series([True] * len(df))
    return df[mask]

# Filter by user score range
def filter_by_userscore_range(df, min_score=None, max_score=None):
    df['userscore'] = pd.to_numeric(df['userscore'], errors='coerce')
    mask = (df['userscore'] >= min_score) if min_score else pd.Series([True] * len(df))
    mask &= (df['userscore'] <= max_score) if max_score else pd.Series([True] * len(df))
    return df[mask]

# Apply a custom filter function
def apply_filter(df, filter_func):
    return df[df.apply(filter_func, axis=1)]

# Example usage
filtered_df = filter_by_critics_count(df, min_critics=5)
filtered_df = filter_by_metascore_range(filtered_df, min_score=70, max_score=90)
filtered_df = filter_by_date_range(filtered_df, start_date='2005-01-01', end_date='2005-12-31')
filtered_df = filter_by_userscore_range(filtered_df, min_score=8.0)

# Apply a custom filter function
def has_critic_review(row):
    return any(isinstance(score, (int, float)) for score in row[2:12])

filtered_df = apply_filter(filtered_df, has_critic_review)

# Ensure the critic score columns are numeric
critic_score_columns = df.columns[2:12]
filtered_df[critic_score_columns] = filtered_df[critic_score_columns].apply(pd.to_numeric, errors='coerce')

# Calculate number of critic reviews and average critic score correctly
filtered_df['num_critic_reviews'] = filtered_df[critic_score_columns].notna().sum(axis=1)
filtered_df['avg_critic_score'] = filtered_df[critic_score_columns].mean(axis=1, skipna=True)

# Sort by number of critic reviews and average critic score
filtered_df = filtered_df.sort_values(by=['num_critic_reviews', 'avg_critic_score'], ascending=[False, False])

# Verify the correct column names
print(filtered_df.columns)

# Select and display the top albums based on the criteria
top_albums = filtered_df[['recordname', 'recordauthor', 'metascore', 'userscore', 'num_critic_reviews', 'avg_critic_score']]
print(top_albums.head())


Index(['Unnamed: 0', 'recordname', 'recordauthor', 'critic', 'score', 'brief',
       'link', 'metascore', 'description', 'userscore', 'date'],
      dtype='object')
Index(['Unnamed: 0', 'recordname', 'recordauthor', 'critic', 'score', 'brief',
       'link', 'metascore', 'description', 'userscore', 'date',
       'num_critic_reviews', 'avg_critic_score'],
      dtype='object')
     recordname  recordauthor  metascore  userscore  num_critic_reviews  \
1066   12 Songs           NaN         81        8.3                   4   
1067   12 Songs           NaN         81        8.3                   4   
1068   12 Songs           NaN         81        8.3                   4   
1069   12 Songs           NaN         81        8.3                   4   
1070   12 Songs           NaN         81        8.3                   4   

      avg_critic_score  
1066      2.828520e+17  
1067      2.828520e+17  
1068      2.828520e+17  
1069      2.828520e+17  
1070      2.828520e+17  


In [23]:
import pandas as pd
from datetime import datetime

# Load data
path = r"C:\Users\texta\Downloads\mc_critic_reviews.csv"
df = pd.read_csv(path, encoding='latin-1')

# Print column names to verify
print(df.columns)

# Filter by the number of critics
def filter_by_critics_count(df, min_critics=None, max_critics=None):
    critics_count = df.iloc[:, 2:12].notna().sum(axis=1)  # Count non-null values in the critic score columns
    mask = (critics_count >= min_critics) if min_critics else pd.Series([True] * len(df))
    mask &= (critics_count <= max_critics) if max_critics else pd.Series([True] * len(df))
    return df[mask]

# Filter by Metascore range
def filter_by_metascore_range(df, min_score=None, max_score=None):
    mask = (df['metascore'] >= min_score) if min_score else pd.Series([True] * len(df))
    mask &= (df['metascore'] <= max_score) if max_score else pd.Series([True] * len(df))
    return df[mask]

# Filter by date range
def filter_by_date_range(df, start_date=None, end_date=None):
    df['date'] = pd.to_datetime(df['date'])
    mask = (df['date'] >= pd.to_datetime(start_date, format='%Y-%m-%d')) if start_date else pd.Series([True] * len(df))
    mask &= (df['date'] <= pd.to_datetime(end_date, format='%Y-%m-%d')) if end_date else pd.Series([True] * len(df))
    return df[mask]

# Filter by user score range
def filter_by_userscore_range(df, min_score=None, max_score=None):
    df['userscore'] = pd.to_numeric(df['userscore'], errors='coerce')
    mask = (df['userscore'] >= min_score) if min_score else pd.Series([True] * len(df))
    mask &= (df['userscore'] <= max_score) if max_score else pd.Series([True] * len(df))
    return df[mask]

# Apply a custom filter function
def apply_filter(df, filter_func):
    return df[df.apply(filter_func, axis=1)]

# Example usage
filtered_df = filter_by_critics_count(df, min_critics=5)
filtered_df = filter_by_metascore_range(filtered_df, min_score=70, max_score=90)
filtered_df = filter_by_date_range(filtered_df, start_date='2005-01-01', end_date='2005-12-31')
filtered_df = filter_by_userscore_range(filtered_df, min_score=8.0)

# Apply a custom filter function
def has_critic_review(row):
    return any(isinstance(score, (int, float)) for score in row[2:12])

filtered_df = apply_filter(filtered_df, has_critic_review)

# Ensure the critic score columns are numeric
critic_score_columns = df.columns[2:12]
filtered_df[critic_score_columns] = filtered_df[critic_score_columns].apply(pd.to_numeric, errors='coerce')

# Calculate number of critic reviews and average critic score correctly
filtered_df['num_critic_reviews'] = filtered_df[critic_score_columns].notna().sum(axis=1)
filtered_df['avg_critic_score'] = filtered_df[critic_score_columns].mean(axis=1, skipna=True)

# Remove rows where average critic score calculation may be incorrect
filtered_df = filtered_df[filtered_df['avg_critic_score'].notna()]

# Sort by number of critic reviews and average critic score
filtered_df = filtered_df.sort_values(by=['num_critic_reviews', 'avg_critic_score'], ascending=[False, False])

# Verify the correct column names
print(filtered_df.columns)

# Select and display the top albums based on the criteria
top_albums = filtered_df[['recordname', 'recordauthor', 'metascore', 'userscore', 'num_critic_reviews', 'avg_critic_score']]
print(top_albums.head())


Index(['Unnamed: 0', 'recordname', 'recordauthor', 'critic', 'score', 'brief',
       'link', 'metascore', 'description', 'userscore', 'date'],
      dtype='object')
Index(['Unnamed: 0', 'recordname', 'recordauthor', 'critic', 'score', 'brief',
       'link', 'metascore', 'description', 'userscore', 'date',
       'num_critic_reviews', 'avg_critic_score'],
      dtype='object')
     recordname  recordauthor  metascore  userscore  num_critic_reviews  \
1066   12 Songs           NaN         81        8.3                   4   
1067   12 Songs           NaN         81        8.3                   4   
1068   12 Songs           NaN         81        8.3                   4   
1069   12 Songs           NaN         81        8.3                   4   
1070   12 Songs           NaN         81        8.3                   4   

      avg_critic_score  
1066      2.828520e+17  
1067      2.828520e+17  
1068      2.828520e+17  
1069      2.828520e+17  
1070      2.828520e+17  


In [19]:
import pandas as pd
from datetime import datetime

# Load data
path = r"C:\Users\texta\Downloads\mc_critic_reviews.csv"
df = pd.read_csv(path, encoding='latin-1')

# Print column names to verify
print(df.columns)

# Filter by the number of critics
def filter_by_critics_count(df, min_critics=None, max_critics=None):
    critics_count = df.iloc[:, 2:12].notna().sum(axis=1)  # Count non-null values in the critic score columns
    mask = (critics_count >= min_critics) if min_critics else pd.Series([True] * len(df))
    mask &= (critics_count <= max_critics) if max_critics else pd.Series([True] * len(df))
    return df[mask]

# Filter by Metascore range
def filter_by_metascore_range(df, min_score=None, max_score=None):
    mask = (df['metascore'] >= min_score) if min_score else pd.Series([True] * len(df))
    mask &= (df['metascore'] <= max_score) if max_score else pd.Series([True] * len(df))
    return df[mask]

# Filter by date range
def filter_by_date_range(df, start_date=None, end_date=None):
    df['date'] = pd.to_datetime(df['date'])
    mask = (df['date'] >= pd.to_datetime(start_date, format='%Y-%m-%d')) if start_date else pd.Series([True] * len(df))
    mask &= (df['date'] <= pd.to_datetime(end_date, format='%Y-%m-%d')) if end_date else pd.Series([True] * len(df))
    return df[mask]

# Filter by user score range
def filter_by_userscore_range(df, min_score=None, max_score=None):
    df['userscore'] = pd.to_numeric(df['userscore'], errors='coerce')
    mask = (df['userscore'] >= min_score) if min_score else pd.Series([True] * len(df))
    mask &= (df['userscore'] <= max_score) if max_score else pd.Series([True] * len(df))
    return df[mask]

# Apply a custom filter function
def apply_filter(df, filter_func):
    return df[df.apply(filter_func, axis=1)]

# Example usage
filtered_df = filter_by_critics_count(df, min_critics=5)
filtered_df = filter_by_metascore_range(filtered_df, min_score=70, max_score=90)
filtered_df = filter_by_date_range(filtered_df, start_date='2005-01-01', end_date='2005-12-31')
filtered_df = filter_by_userscore_range(filtered_df, min_score=8.0)

# Apply a custom filter function
def has_critic_review(row):
    return any(isinstance(score, (int, float)) for score in row[2:12])

filtered_df = apply_filter(filtered_df, has_critic_review)

# Ensure the critic score columns are numeric
critic_score_columns = df.columns[2:12]
filtered_df[critic_score_columns] = filtered_df[critic_score_columns].apply(pd.to_numeric, errors='coerce')

# Calculate number of critic reviews and average critic score
filtered_df['num_critic_reviews'] = filtered_df[critic_score_columns].notna().sum(axis=1)
filtered_df['avg_critic_score'] = filtered_df[critic_score_columns].mean(axis=1, skipna=True)

# Sort by number of critic reviews and average critic score
filtered_df = filtered_df.sort_values(by=['num_critic_reviews', 'avg_critic_score'], ascending=[False, False])

# Verify the correct column names
print(filtered_df.columns)

# Adjust column names based on the actual DataFrame
top_albums = filtered_df[['recordname', 'recordauthor', 'metascore', 'userscore', 'num_critic_reviews', 'avg_critic_score']]
print(top_albums.head())


Index(['Unnamed: 0', 'recordname', 'recordauthor', 'critic', 'score', 'brief',
       'link', 'metascore', 'description', 'userscore', 'date'],
      dtype='object')
Index(['Unnamed: 0', 'recordname', 'recordauthor', 'critic', 'score', 'brief',
       'link', 'metascore', 'description', 'userscore', 'date',
       'num_critic_reviews', 'avg_critic_score'],
      dtype='object')
     recordname  recordauthor  metascore  userscore  num_critic_reviews  \
1066   12 Songs           NaN         81        8.3                   4   
1067   12 Songs           NaN         81        8.3                   4   
1068   12 Songs           NaN         81        8.3                   4   
1069   12 Songs           NaN         81        8.3                   4   
1070   12 Songs           NaN         81        8.3                   4   

      avg_critic_score  
1066      2.828520e+17  
1067      2.828520e+17  
1068      2.828520e+17  
1069      2.828520e+17  
1070      2.828520e+17  


In [30]:
import pandas as pd

# Load the data
path = r"C:\Users\texta\Downloads\mc_critic_reviews.csv"
df = pd.read_csv(path, encoding='latin-1')

# Convert scores to numeric, coercing errors to handle any non-numeric values
df['score'] = pd.to_numeric(df['score'], errors='coerce')
df['userscore'] = pd.to_numeric(df['userscore'], errors='coerce')

# Group by 'recordname' and calculate the average critic and user scores
average_scores = df.groupby('recordname').agg({
    'score': 'mean',
    'userscore': 'mean',
    'recordname': 'size'
}).rename(columns={'recordname': 'rating_count'}).reset_index()

# Filter albums with more than 15 critic ratings
more_than_15_ratings = average_scores[average_scores['rating_count'] > 15]

# Sort by average score
highest_rated_albums = more_than_15_ratings.sort_values(by=['score', 'userscore'], ascending=False)

print(highest_rated_albums)


                    recordname      score  userscore  rating_count
8489                     SMiLE  92.896552        7.9            29
3387    Fetch the Bolt Cutters  92.821429        8.1            28
11087      To Pimp A Butterfly  92.636364        8.9            44
3863                  Ghosteen  92.607143        8.6            28
11562            Van Lear Rose  92.074074        8.4            27
...                        ...        ...        ...           ...
10835            The Weirdness  43.843750        5.7            32
6156                      Lulu  42.258065        2.3            31
11460  United Nations of Sound  40.437500        6.4            16
5928                 Liz Phair  39.571429        6.1            21
3299        Famous First Words  32.882353        2.5            17

[4393 rows x 4 columns]


In [36]:
import pandas as pd

# Load the data
path = r"C:\Users\texta\Downloads\mc_critic_reviews.csv"
df = pd.read_csv(path, encoding='latin-1')

# Convert scores to numeric, coercing errors to handle any non-numeric values
df['score'] = pd.to_numeric(df['score'], errors='coerce')
df['userscore'] = pd.to_numeric(df['userscore'], errors='coerce')

# Group by 'recordname' and calculate the average critic and user scores
average_scores = df.groupby('recordname').agg({
    'score': 'mean',
    'userscore': 'mean',
    'link': 'first',  # Retain the first URL
    'recordname': 'size'
}).rename(columns={'recordname': 'rating_count'}).reset_index()

# Filter albums with more than 15 critic ratings
more_than_15_ratings = average_scores[average_scores['rating_count'] > 15]

# Sort by average score
highest_rated_albums = more_than_15_ratings.sort_values(by=['score', 'userscore'], ascending=False)



In [52]:
import pandas as pd

# Load the data
path = r"C:\Users\texta\Downloads\mc_critic_reviews.csv"
df = pd.read_csv(path, encoding='latin-1')

# Convert scores to numeric, coercing errors to handle any non-numeric values
df['score'] = pd.to_numeric(df['score'], errors='coerce')
df['userscore'] = pd.to_numeric(df['userscore'], errors='coerce')

# Group by 'recordname' and calculate the average critic and user scores
average_scores = df.groupby('recordname').agg({
    'recordname': 'size',
    'score': 'mean',
    'userscore': 'mean',
    'recordauthor': 'first',
    'link': 'first'
}).rename(columns={'recordname': 'rating_count'}).reset_index()

# Filter albums with more than 15 critic ratings
more_than_15_ratings = average_scores[average_scores['rating_count'] > 15]

# Sort by average score
highest_rated_albums = more_than_15_ratings.sort_values(by=['score', 'userscore'], ascending=False)

print(highest_rated_albums)


                    recordname  rating_count      score  userscore  \
8489                     SMiLE            29  92.896552        7.9   
3387    Fetch the Bolt Cutters            28  92.821429        8.1   
11087      To Pimp A Butterfly            44  92.636364        8.9   
3863                  Ghosteen            28  92.607143        8.6   
11562            Van Lear Rose            27  92.074074        8.4   
...                        ...           ...        ...        ...   
10835            The Weirdness            32  43.843750        5.7   
6156                      Lulu            31  42.258065        2.3   
11460  United Nations of Sound            16  40.437500        6.4   
5928                 Liz Phair            21  39.571429        6.1   
3299        Famous First Words            17  32.882353        2.5   

                    recordauthor  \
8489                Brian Wilson   
3387                 Fiona Apple   
11087             Kendrick Lamar   
3863   Nick Cav

In [50]:
import pandas as pd

# Load the data
path = r"C:\Users\texta\Downloads\mc_critic_reviews.csv"
df = pd.read_csv(path, encoding='latin-1')

# Convert scores to numeric, coercing errors to handle any non-numeric values
df['score'] = pd.to_numeric(df['score'], errors='coerce')
df['userscore'] = pd.to_numeric(df['userscore'], errors='coerce')

# Concatenate artist name with record name
df['record'] = df['recordname'] + ' - ' + df['recordauthor']

# Group by 'record' and calculate the average critic and user scores
average_scores = df.groupby('record').agg({
    'score': 'mean',
    'userscore': 'mean',
    'record': 'size',
    'link': 'first'
}).rename(columns={'record': 'rating_count'}).reset_index()

# Filter albums with more than 15 critic ratings
more_than_15_ratings = average_scores[average_scores['rating_count'] > 15]

# Sort by average score
highest_rated_albums = more_than_15_ratings.sort_values(by=['score', 'userscore'], ascending=False)

print(highest_rated_albums)


                                           record      score  userscore  \
8791                         SMiLE - Brian Wilson  92.896552        7.9   
3486         Fetch the Bolt Cutters - Fiona Apple  92.821429        8.1   
11456        To Pimp A Butterfly - Kendrick Lamar  92.636364        8.9   
3991         Ghosteen - Nick Cave & the Bad Seeds  92.607143        8.6   
11957                Van Lear Rose - Loretta Lynn  92.074074        8.4   
...                                           ...        ...        ...   
6381                             Lulu - Metallica  42.258065        2.3   
11846  United Nations of Sound - Richard Ashcroft  40.437500        6.4   
6144                        Liz Phair - Liz Phair  39.571429        6.1   
8458                          Rebirth - Lil Wayne  36.523810        3.3   
3395            Famous First Words - Viva Brother  32.882353        2.5   

       rating_count                                               link  
8791             29  http:

In [53]:
# Define the path for the output CSV file
output_path = r"highest_rated_albums.csv"

# Save the DataFrame to a CSV file
highest_rated_albums.to_csv(output_path, index=False)

print("CSV file saved successfully.")


CSV file saved successfully.


In [24]:
import pandas as pd
from datetime import datetime

path = r"C:\Users\texta\Downloads\mc_critic_reviews.csv"
# pd.read_csv(path, encoding='utf-8')
df = pd.read_csv(path, encoding='latin-1')

# Filter by the number of critics
def filter_by_critics_count(df, min_critics=None, max_critics=None):
    critics_count = df.iloc[:, 2:12].notna().sum(axis=1)  # Count non-null values in the critic score columns
    mask = (critics_count >= min_critics) if min_critics else pd.Series([True] * len(df))
    mask &= (critics_count <= max_critics) if max_critics else pd.Series([True] * len(df))
    return df[mask]

# Filter by Metascore range
def filter_by_metascore_range(df, min_score=None, max_score=None):
    mask = (df['metascore'] >= min_score) if min_score else pd.Series([True] * len(df))
    mask &= (df['metascore'] <= max_score) if max_score else pd.Series([True] * len(df))
    return df[mask]

# Filter by date range
def filter_by_date_range(df, start_date=None, end_date=None):
    df['date'] = pd.to_datetime(df['date'])
    mask = (df['date'] >= pd.to_datetime(start_date, format='%Y-%m-%d')) if start_date else pd.Series([True] * len(df))
    mask &= (df['date'] <= pd.to_datetime(end_date, format='%Y-%m-%d')) if end_date else pd.Series([True] * len(df))
    return df[mask]

def filter_by_userscore_range(df, min_score=None, max_score=None):
    # Convert the 'userscore' column to numeric
    df['userscore'] = pd.to_numeric(df['userscore'], errors='coerce')
    
    mask = (df['userscore'] >= min_score) if min_score else pd.Series([True] * len(df))
    mask &= (df['userscore'] <= max_score) if max_score else pd.Series([True] * len(df))
    return df[mask]

# Apply a custom filter function
def apply_filter(df, filter_func):
    return df[df.apply(filter_func, axis=1)]

# Example usage
# ... (load your data into a list of dictionaries called 'data')

# df = pd.DataFrame(data)

# Filter by at least 5 critics and Metascore between 70 and 90
filtered_df = filter_by_critics_count(df, min_critics=5)
filtered_df = filter_by_metascore_range(filtered_df, min_score=70, max_score=90)

# Filter by release date in 2005
# filtered_df = filter_by_date_range(filtered_df, start_date='2005-01-01', end_date='2005-12-31')

# Filter by user score greater than 8.0
# filtered_df = filter_by_userscore_range(filtered_df, min_score=8.0)

# Apply a custom filter function
def has_critic_review(row):
    return any(isinstance(score, (int, float)) for score in row[2:12])

# filtered_df = apply_filter(filtered_df, has_critic_review)

In [25]:
filtered_df

Unnamed: 0.1,Unnamed: 0,recordname,recordauthor,critic,score,brief,link,metascore,description,userscore,date
0,1,'64 - '95,Lemon Jelly,PopMatters,90,"It is a fantastic album, no less so than the o...",http://popmatters.com/music/reviews/l/lemonjel...,72,The English duo's third album is composed enti...,8.3,2005-01-25
1,2,'64 - '95,Lemon Jelly,Splendid,90,"Though sampling has been done to death, the st...",http://www.splendidezine.com/review.html?revie...,72,The English duo's third album is composed enti...,8.3,2005-01-25
2,3,'64 - '95,Lemon Jelly,Lost At Sea,90,Perhaps their best outing yet.,http://www.lostatsea.net/review.phtml?id=18856...,72,The English duo's third album is composed enti...,8.3,2005-01-25
3,4,'64 - '95,Lemon Jelly,AllMusic,90,It's breathtaking and essential listening for ...,http://www.allmusic.com/cg/amg.dll?p=amg&amp;s...,72,The English duo's third album is composed enti...,8.3,2005-01-25
4,5,'64 - '95,Lemon Jelly,Billboard,90,Being sampled by Lemon Jelly on this astonishi...,http://www.billboard.com/bb/reviews/album_arti...,72,The English duo's third album is composed enti...,8.3,2005-01-25
...,...,...,...,...,...,...,...,...,...,...,...
185634,185648,ZUU,Denzel Curry,Exclaim,80,Curry has come a long way since he blew up and...,https://exclaim.ca/music/article/denzel_curry-zuu,85,The fourth full-length studio release for the ...,8.0,2019-05-31
185635,185649,ZUU,Denzel Curry,Clash Music,80,ZUU is an experience that transports the liste...,https://www.clashmusic.com/reviews/denzel-curr...,85,The fourth full-length studio release for the ...,8.0,2019-05-31
185636,185650,ZUU,Denzel Curry,New Musical Express (NME),80,"Overall, he has created a musical representati...",https://www.nme.com/reviews/album/denzel-curry...,85,The fourth full-length studio release for the ...,8.0,2019-05-31
185637,185651,ZUU,Denzel Curry,Consequence,75,"ZUU is Currys ASTROWORLD, unmistakably transp...",https://consequenceofsound.net/2019/06/album-r...,85,The fourth full-length studio release for the ...,8.0,2019-05-31


In [14]:
import pandas as pd
from datetime import datetime, timedelta

# Sample DataFrame setup (this should be your actual DataFrame)
# df = pd.read_csv('your_file.csv')  # Use your actual file
# df should have columns 'brief', 'rating', 'num_critics'

# Example structure:
# df = pd.DataFrame({
#     'brief': ["Album A (2013)", "Album B (2014)", "Album C (2015)"],
#     'rating': [4.5, 4.8, 4.7],
#     'num_critics': [100, 150, 120]
# })

# Create a new column to store the release date
df['release_date'] = df['brief'].apply(lambda x: datetime.strptime(x.split('(')[1].split(')')[0], '%Y') if x != '' else datetime(2000, 1, 1))

# Filter to get only records where the release date is from 2012 and onwards
df = df[df['release_date'] >= datetime(2012, 1, 1)]

# Filter out records where the release date is after '2014-09-30'
df = df[df['release_date'] <= datetime(2014, 9, 30)]

# Sort the resulting dataframe based on release date
sorted_data = df.sort_values('release_date')

# Get the list of unique release dates
release_dates = sorted(list(set(sorted_data['release_date'])))

# For each release date, get the corresponding rows from the dataframe
date_ranges = []
for release_date in release_dates:
    date_range_start = release_date - timedelta(days=30)
    date_range_end = release_date + timedelta(days=30)
    date_ranges.append((date_range_start, date_range_end))

for date_range_start, date_range_end in date_ranges:
    mask = (sorted_data['release_date'] >= date_range_start) & (sorted_data['release_date'] <= date_range_end)
    next_data = sorted_data.loc[mask]
    print(f"Between {date_range_start.date()} and {date_range_end.date()} : {next_data.shape[0]}")
    next_data.to_csv(f"albums_{date_range_start.strftime('%m_%d_%Y')}.csv", index=False)

# Calculate a combined score for sorting: rating * number of critics
sorted_data['score'] = sorted_data['rating'] * sorted_data['num_critics']

# Sort by the combined score, and also consider the number of critics
important_albums = sorted_data.sort_values(by=['score', 'num_critics', 'rating'], ascending=[False, False, False])

# Print the most important albums
print("Top quality albums based on rating and number of critics:")
print(important_albums[['brief', 'rating', 'num_critics']])


IndexError: list index out of range