<a href="https://colab.research.google.com/github/pkaiser8/info-664-final/blob/main/PK_final_draft_nov_25_update.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Three (or less) Randomized Records from the Collection of the Cooper Hewitt, Smithsonian Design Museum

In [1]:
import pandas as pd
import random

# Establish file path for .CSV data sheet:
data_filepath = '/content/objects-refined.csv'

def load_data(data_filepath):
    """
    Loads data from a CSV file.

    Inputs:
        data_filepath: The file or URL path to the CSV file.

    Returns:
        The loaded DataFrame.
    """

    # Read the data from the .CSV defined in data_filepath.
    # Use low_memory=False to process entire file at once.
    objects_df = pd.read_csv(data_filepath, low_memory=False)
    return objects_df

def get_user_input(objects_df):
  """Pulls in user input to establish key selection"""

  # Allow the user to define the desired output by selecting from the key columns outlined in the .CSV file used:
  print(f'Welcome to the Cooper Hewitt, Smithsonian Design Museum collections object randomizer. \nPlease see below a list of keys used to define objects in the collection .CSV file:\n')
  # Prints the .CSV columns so the user can decide which to input:
  print(objects_df.columns)
  print()
  print(f'Select which keys you would like to pair together to randomly find three or less records which share \ncommon values from these elements.\nNote: Some key combinations work better than others. See which ones yield the best results\n')

  # User inputted information for each desired key:
  user_selected_key_1 = input(f'Please enter one of the keys listed above. Remember to include everything between the quotes:\n')
  print()
  user_selected_key_2 = input(f'Please enter a second key:\n')
  print()
  print(f'You have selected "{user_selected_key_1}" and "{user_selected_key_2}" as your grouped keys.')
  # Create a group of with these two user inputted key selections:
  user_input = [user_selected_key_1, user_selected_key_2]
  return user_input

def group_data(data_filepath, user_input):
    """Loads data from a CSV file and groups it by a specified column.

    Inputs:
        data_filepath (string): The file or URL path to the CSV file.
        user_input (string): The key columns to group the data by.

    Returns:
        The grouped DataFrame.
    """

    # Use the groupby() method to group the rows in the DataFrame
    # based on specific values in the two user defined key columns:
    grouped_df = objects_df.groupby(user_input)
    return grouped_df

# Run the functions to get user input for key selection

# Load the data:
objects_df = load_data(data_filepath)

# Get user input:
user_input = get_user_input(objects_df)

# Apply the grouping:
grouped_df = group_data(data_filepath, user_input)

Welcome to the Cooper Hewitt, Smithsonian Design Museum collections object randomizer. 
Please see below a list of keys used to define objects in the collection .CSV file:

Index(['accession_number', 'creditline', 'date', 'decade', 'department_id',
       'description', 'dimensions', 'dimensions_raw', 'gallery_text', 'id',
       'inscribed', 'is_active', 'is_loan_object', 'justification',
       'label_text', 'markings', 'media_id', 'medium', 'on_display',
       'period_id', 'primary_image', 'primary_image2', 'provenance', 'signed',
       'title', 'title_raw', 'tms:id', 'tombstone', 'type', 'type_id', 'url',
       'videos', 'woe:country', 'woe:country_id', 'woe:country_name',
       'year_acquired', 'year_end', 'year_start'],
      dtype='object')

Select which keys you would like to pair together to randomly find three or less records which share 
common values from these elements.
Note: Some key combinations work better than others. See which ones yield the best results

Please e

In [22]:
# This is the ideal number of records the function below should aim to return:
num_records = 3

def select_random_group_and_records(grouped_df, num_records=3):
    """Selects a random group and a specified number of random records from that group.

    Inputs:
        grouped_df: The grouped DataFrame.
        num_records (int, optional): The number of records to select. Defaults to 3.

    Returns:
        tuple: A tuple containing the selected group key and the random records.
        selected_group_value (string): The key of the selected group.
        random_records (DataFrame): The randomly selected records.
        An error message is printed if no records are found for the selected group.
    """

    # Creates a list of all possible key values from the DataFrame (grouped_df) called group_keys:
    group_key_value = list(grouped_df.groups.keys())
    # The random method pulls a random choice of values from the group_keys variable above:
    selected_group_value = random.choice(group_key_value)

    try:
        # Using pd get_group() method, extracts whole records
        # from the randomized variable selected_group_value
        selected_group_records = grouped_df.get_group(selected_group_value)
    except KeyError or UnboundLocalError:
        print(f"Could not find records since one group value is blank, please run this cell again.\nYou may also reselect your two keys in the cell above and rerun both cells.\n")
        # Returns an empty DataFrame if a KeyError occurs,
        # and asks the user to try again or change the parameters in the cell before
        return selected_group_value, pd.DataFrame()

    # Checks if the number of records fall within selected_group_records
    # variable is equal to the desired number of records (num_records):
    if len(selected_group_records) >= num_records:
        # Selects at most three records from the selected_group:
        random_records = selected_group_records.sample(n=num_records)
    else:
        # If less than three records are found, it will still display what was found:
        random_records = selected_group_records
    # Returns the selected grouped valued based on user inputted keys and the
    # random records that share those randomly selected values in those keys:
    return selected_group_value, random_records

# Call the functions directly to execute the code
grouped_df = group_data(data_filepath, user_input)
selected_group_value, random_records = select_random_group_and_records(grouped_df, num_records)

print("Randomly Selected Group Values:\n")
print(f'{user_input[0]} = {selected_group_value[0]}')
print(f'{user_input[1]} = {selected_group_value[1]}\n')
if not random_records.empty:
  print("Randomly Selected Records:")
  print()
  print(random_records)

Randomly Selected Group Values:

type = Calculator
decade = 1970.0

Randomly Selected Records:

      accession_number                                  creditline  date  \
85806        1986-99-1  Gift of Barry Friedman and Patricia Pastor  1973   
62008         2008-9-5            Gift of Max Pine and Lois Mander  1973   
85840       1986-99-41  Gift of Barry Friedman and Patricia Pastor  1973   

       decade  department_id  \
85806  1970.0       35347497   
62008  1970.0       35347497   
85840  1970.0       35347497   

                                             description  \
85806  Rectangular grey adding machine. Sloping front...   
62008  This model had a total of twelve versions that...   
85840  Low, horizontal rectangular form with curved e...   

                                              dimensions  dimensions_raw  \
85806  H x W x D: 12.3 x 42.2 x 33.2 cm (4 13/16 x 16...             NaN   
62008  H x W x D: 5.1 x 11.2 x 2.8 cm (2 x 4 3/8 x 1 ...             NaN   
8

In [24]:
def generate_html_table(random_records):
  """Generates an HTML table to visually display"""

  all_records_data = []
  for column in random_records.values:
    record_data = {
        'Image': column[20],
        'Title': column[24],
        'Date': column[2],
        'Medium': column[17],
        'Dimensions': column[6],
        'Type': column[28],
        'Country': column[34],
        'Accession Number': column[0]
        }
    all_records_data.append(record_data)

  # Build an HTML table, starting with the CSS:
  html_table = """
  <table style='border-collapse: separate; border-spacing: 10px; border: 2px solid #ddd;'>
    <tr>
        <th style='border: 2px dotted #fff; padding: 8px;'>Image</th>
        <th style='border: 2px dotted #fff; padding: 8px;'>Title</th>
        <th style='border: 2px dotted #fff; padding: 8px;'>Date</th>
        <th style='border: 2px dotted #fff; padding: 8px;'>Medium</th>
        <th style='border: 2px dotted #fff; padding: 8px;'>Dimensions</th>
        <th style='border: 2px dotted #fff; padding: 8px;'>Type</th>
        <th style='border: 2px dotted #fff; padding: 8px;'>Country</th>
        <th style='border: 2px dotted #fff; padding: 8px;'>Accession Number</th>
    </tr>
  """

  # Generates an HTML table to visually display the data compiled in the "all_records_data" list made above
  # (1-3 records) stored in all_records_data list defined above:
  for record in all_records_data:
    html_table += "<tr>"
    # Check to see if the record contains a valid image link
    if 'Image' in record and pd.notna(record['Image']) != False:
      image_link = record['Image']
    else:
      # Use a placeholder image link to Cooper Hewitt logo if the 'Image' value is missing or empty:
      image_link = 'https://upload.wikimedia.org/wikipedia/commons/thumb/f/fa/Cooper_Hewitt%2C_Smithsonian_Design_Museum_logo.svg/320px-Cooper_Hewitt%2C_Smithsonian_Design_Museum_logo.svg.png'

    # Wrap the image in an <a> tag to create a link in the
    # image thumbnail to view the full size picture in a separate tab:
    html_table += f"<td style='border: 1px solid #ddd; padding: 8px;'><a href='{image_link}' target='_blank'><img src='{image_link}' width='100'></a></td>"
    for key, value in record.items():
        # Skip 'Image' as it's already handled:
        if key != 'Image':
            html_table += f"<td style='border: 1px solid #ddd; padding: 8px;'>{value}</td>"
    html_table += "</tr>"

  html_table += "</table>"
  return html_table

print("Three or less Cooper Hewitt collection objects selected from the database with\nthe following shared values under the user selected keys:\n")
print(f'{user_input[0]} = {selected_group_value[0]}')
print(f'{user_input[1]} = {selected_group_value[1]}')
print()

# Import the HTML from IPython.display
# Source: https://ipython.readthedocs.io/en/8.26.0/api/generated/IPython.display.html
from IPython.display import HTML
html_table = generate_html_table(random_records)
display(HTML(html_table))

Three or less Cooper Hewitt collection objects selected from the database with
the following shared values under the user selected keys:

type = Calculator
decade = 1970.0



Image,Title,Date,Medium,Dimensions,Type,Country,Accession Number
,"Logos 59 Calculator, 1973",1973,"Molded plastic, metal",H x W x D: 12.3 x 42.2 x 33.2 cm (4 13/16 x 16 5/8 x 13 1/16 in.),Calculator,Italy,1986-99-1
,"Cambridge (I-40) Calculator, 1973",1973,Plastic,H x W x D: 5.1 x 11.2 x 2.8 cm (2 x 4 3/8 x 1 1/8 in.),Calculator,United Kingdom,2008-9-5
,"Divisumma 18 Calculator, 1973",1973,"Abs plastic, melamine, rubber, metal",H x W x D: 4.6 x 30.9 x 12cm (1 13/16 x 12 3/16 x 4 3/4in.),Calculator,Italy,1986-99-41


## **OLD CODE CELLS BELOW**

In [33]:
# Create a dictionary entry for each individual record called and compile them into a list

all_records_data = []

for column in random_records.values:
    record_data = {
        'Image': column[20],
        'Title': column[24],
        'Date': column[2],
        'Medium': column[17],
        'Dimensions': column[6],
        'Type': column[28],
        'Country': column[34],
        'Accession Number': column[0]
    }
# Appends all 1-3 records to a singular dictionary called all_records_data
    all_records_data.append(record_data)

In [None]:
print(image_link)
print(record['Image'])
print(record.get('Image'))
pd.notna(record['Image'])

https://images.collection.cooperhewitt.org/219908_0f686168a388b1d6_z.jpg
https://images.collection.cooperhewitt.org/219908_0f686168a388b1d6_z.jpg
https://images.collection.cooperhewitt.org/219908_0f686168a388b1d6_z.jpg


True

In [None]:
import pandas as pd

data_filepath = '/content/objects-refined.csv'

# Use the 'error_bad_lines=False' argument to skip problematic lines
objects_df = pd.read_csv(data_filepath, on_bad_lines='skip')
# Alternatively, you can use 'warn' instead of 'skip' to get warnings about bad lines

# You can also specify the delimiter explicitly if it's not a comma
# trees_df = pd.read_csv(data_filepath, sep=';', on_bad_lines='skip') # Example with semicolon delimiter

objects_df.head()

  objects_df = pd.read_csv(data_filepath, on_bad_lines='skip')


Unnamed: 0,accession_number,creditline,date,decade,department_id,description,dimensions,dimensions_raw,gallery_text,id,...,type,type_id,url,videos,woe:country,woe:country_id,woe:country_name,year_acquired,year_end,year_start
0,56.2015.10,Courtesy of Pixar Animation Studios,2006,2000.0,404529577,,,,,135726747,...,Dirt sample,136300499.0,https://collection.cooperhewitt.org/objects/13...,,,,,,,
1,56.2015.4,Courtesy of Pixar Animation Studios,2006,2000.0,404529577,,H x W: 21.6 × 45.7 cm (8 1/2 in. × 18 in.),,,135726733,...,Concept art,136251375.0,https://collection.cooperhewitt.org/objects/13...,,,,,,,
2,56.2015.7,Courtesy of Pixar Animation Studios,2006,2000.0,404529577,,H x W: 27.9 × 21.6 cm (11 in. × 8 1/2 in.),,The natural environment plays a large role in ...,135726743,...,Concept art,136251375.0,https://collection.cooperhewitt.org/objects/13...,,,,,,,
3,56.2015.12,Courtesy of Pixar Animation Studios,2006,2000.0,404529577,,,,,135726751,...,Dirt sample,136300499.0,https://collection.cooperhewitt.org/objects/13...,,,,,,,
4,56.2015.2,Courtesy of Pixar Animation Studios,2006,2000.0,404529577,,H x W: 23.5 × 47.6 cm (9 1/4 × 18 3/4 in.),,While on a research road trip for Cars along R...,135726729,...,Concept art,136251375.0,https://collection.cooperhewitt.org/objects/13...,,,,,,,


In [None]:
for column in random_records.values:
  print(f'Image: {column[20]}')
  print(f'Title: {column[24]}')
  print(f'Date: {column[2]}')
  print(f'Medium: {column[17]}')
  print(f'Dimensions: {column[6]}')
  print(f'Type: {column[28]}')
  print(f'Accesssion Number: {column[0]}')
  print() #add an empty print statement to create a break

Image: https://images.collection.cooperhewitt.org/43728_37936fe0b37f1c87_z.jpg
Title: Sidewall (Canada), 1950–56
Date: 1950–56
Medium: Machine-printed
Dimensions: 56.5 cm (22 1/4 in.)
Type: Sidewall
Accesssion Number: 1991-89-209

Image: https://images.collection.cooperhewitt.org/82179_2ceaed803663446c_z.jpg
Title: Sidewall (Canada), 1950–58
Date: 1950–58
Medium: Machine-printed, textured ground
Dimensions: 1062 x 55.5 cm (418 1/8 x 21 7/8 in. )
Type: Sidewall
Accesssion Number: 1991-89-214

Image: https://images.collection.cooperhewitt.org/120717_467d7efb42d2b9a1_z.jpg
Title: Sidewall (Canada), 1954
Date: 1954
Medium: Machine-printed
Dimensions: 55.5 cm (21 7/8 in.)
Type: Sidewall
Accesssion Number: 1991-89-202



In [None]:
import pandas as pd
import random

# Load the data
data_filepath = '/content/objects-refined.csv'

# Group by 'Type' and other elements later on...
grouped_df = object_df.groupby(['date'])

# Get list of group keys
group_keys = list(grouped_df.groups.keys())

# Select a random group
selected_group_key = random.choice(group_keys)
selected_group_records = grouped_df.get_group(selected_group_key)

# Select three random records if available
if len(selected_group_records) >= 3:
    random_records = selected_group_records.sample(n=3)
else:
    random_records = selected_group_records  # Select all if fewer than 3

# Output results
print("Selected Group Key:", selected_group_key)
print("Randomly Selected Records:")
print(random_records)

NameError: name 'object_df' is not defined

In [None]:
output_string = ""

for column in random_records.values:
    output_string += f"""
Image: {column[20]}
Title: {column[24]}
Date: {column[2]}
Medium: {column[17]}
Dimensions: {column[6]}
Type: {column[28]}
Accession Number: {column[0]}

"""  # Add an extra newline for spacing between records

print(output_string)