<a href="https://colab.research.google.com/github/pkaiser8/info-664-final/blob/main/PK_final_draft.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Three (or less) Randomized Records from the Collection of the Cooper Hewitt, Smithsonian Design Museum

Read me:

In [57]:
import pandas as pd
import random

#print(f'Welcome to the Cooper Hewitt, Smithsonian Design Museum collections object randomizer. \nPlease see below a list of elements used to define objects in the collection:\n')
#print(objects_df.columns)
#print()
#print(f'Select which elements you would like to pair together to randomly find records which share \ncommon values from these elements.\n')

#user_selected_element_1 = input(f'Please enter one of the elements listed above. Remember to include everything between the quotes.\n')
#print()
#user_selected_element_2 = input(f'Please enter a second element.\n')
#print()
#print(f'You have selected {user_selected_element_1} and {user_selected_element_2} as your elements.')

# Defining variables for the functions:
data_filepath = '/content/objects-refined.csv'
#group_column = (user_selected_element_1, user_selected_element_2) # Crete a group with these two columns
group_column = ['woe:country', 'date']
num_records = 3 # This is the ideal number of records returned, it will however, return 1-2 if that's all it can find

def load_and_group_data(data_filepath, group_column):
    """Loads data from a CSV file and groups it by a specified column.

    Arguments:
        data_filepath (string): The path to the CSV file.
        group_column (string): The column to group the data by.

    Returns:
        The grouped DataFrame.
    """
    objects_df = pd.read_csv(data_filepath, low_memory=False) # Use low_memory=False to process entire file at once
    grouped_df = objects_df.groupby(group_column) # Use the groupby() method to group the rows in the DataFrame based on specific values in one or more columns
    return grouped_df

In [58]:
def select_random_group_and_records(grouped_df, num_records=3):
    """Selects a random group and a specified number of random records from that group.

    Args:
        grouped_df: The grouped DataFrame.
        num_records (int, optional): The number of records to select. Defaults to 3.

    Returns:
        tuple: A tuple containing the selected group key and the random records.
        selected_group_key (string): The key of the selected group.
        random_records (DataFrame): The randomly selected records.
        An error message is printed if no records are found for the selected group.
    """
    group_keys = list(grouped_df.groups.keys()) # Creates a list of unique group names from the DataFrame (grouped_df) called group_keys
    selected_group_key = random.choice(group_keys) # The random method pulls a random choice of  from the group_keys value

    try:
        selected_group_records = grouped_df.get_group(selected_group_key)
    except KeyError or UnboundLocalError:
        print(f"Could not find records since one group is blank, please run the cell again.")
        return selected_group_key, pd.DataFrame() # Return empty DataFrame if KeyError occurs

    if len(selected_group_records) >= num_records: # This calculates the number of records within selected_group_records
        random_records = selected_group_records.sample(n=num_records)
    else:
        random_records = selected_group_records

    return selected_group_key, random_records

# Call the functions directly to execute the code
grouped_df = load_and_group_data(data_filepath, group_column)
selected_group_key, random_records = select_random_group_and_records(grouped_df, num_records)

print("Selected Group Key:", selected_group_key)
if not random_records.empty:
  print("Randomly Selected Records:")
  print()
  print(random_records)

Selected Group Key: (23424975.0, 'Ca. 1936')
Randomly Selected Records:

       accession_number                        creditline      date  decade  \
75057        1992-130-1             Gift of Deane Granoff  Ca. 1936  1920.0   
154275      1963-39-101  Gift of Mrs. E. McKnight Kauffer  Ca. 1936  1930.0   

        department_id                                        description  \
75057        35347497  Necklace has charms in the form of hearts, one...   
154275       35347493  The world in black and white, surrounded by te...   

                                               dimensions  dimensions_raw  \
75057   Overall: 24.4 x 13 x 1.2 cm (9 5/8 x 5 1/8 x 1...             NaN   
154275               102 x 64.1 cm (40 3/16 x 25 1/4 in.)             NaN   

       gallery_text        id  ...      type     type_id  \
75057           NaN  18638861  ...  Necklace  35237147.0   
154275          NaN  18446727  ...    Poster  35238163.0   

                                               

In [59]:
# Create a dictionary entry for each individual record called and compile them into a list

all_records_data = []

for column in random_records.values:
    record_data = {
        'Image': column[20],
        'Title': column[24],
        'Date': column[2],
        'Medium': column[17],
        'Dimensions': column[6],
        'Type': column[28],
        'Country': column[34],
        'Accession Number': column[0]
    }
# Appends all 1-3 records to a singular dictionary called all_records_data
    all_records_data.append(record_data)

In [60]:
# CSS styling for an HTML table to display all_records_data dictionaries defined above
html_table = """

<table style='border-collapse: separate; border-spacing: 10px; border: 2px solid #ddd;'>
    <tr>
        <th style='border: 2px dotted #fff; padding: 8px;'>Image</th>
        <th style='border: 2px dotted #fff; padding: 8px;'>Title</th>
        <th style='border: 2px dotted #fff; padding: 8px;'>Date</th>
        <th style='border: 2px dotted #fff; padding: 8px;'>Medium</th>
        <th style='border: 2px dotted #fff; padding: 8px;'>Dimensions</th>
        <th style='border: 2px dotted #fff; padding: 8px;'>Type</th>
        <th style='border: 2px dotted #fff; padding: 8px;'>Country</th>
        <th style='border: 2px dotted #fff; padding: 8px;'>Accession Number</th>
    </tr>"""

# Generates an HTML table to visually display the data
# (1-3 records) stored in all_records_data list defined above

for record in all_records_data:
    html_table += "<tr>"

    # Check to see if the record contains a valid image link
    if 'Image' in record and pd.notna(record['Image']) != False:
      image_link = record['Image']
    else:
      # Use a placeholder image link to Cooper Hewitt logo if 'Image' is missing or empty
      image_link = 'https://upload.wikimedia.org/wikipedia/commons/thumb/f/fa/Cooper_Hewitt%2C_Smithsonian_Design_Museum_logo.svg/320px-Cooper_Hewitt%2C_Smithsonian_Design_Museum_logo.svg.png'

    # Wrap the image in an <a> tag to create a link in the
    # image thumbnail to view the full size picture in a separate tab
    html_table += f"<td style='border: 1px solid #ddd; padding: 8px;'><a href='{image_link}' target='_blank'><img src='{image_link}' width='100'></a></td>"
    for key, value in record.items():
        if key != 'Image':  # Skip 'Image' as it's already handled
            html_table += f"<td style='border: 1px solid #ddd; padding: 8px;'>{value}</td>"
    html_table += "</tr>"

# Build the HTML table to display the data, print the groups pulled from selected_group_key variable defined above
print("Cooper Hewitt collection objects selected from the database with the following shared elements:")
print(f'{group_column[0]} = {selected_group_key[0]}')
print(f'{group_column[1]} = {selected_group_key[1]}')
print()

html_table += "</table>"

# Import the HTML from IPython.display
# Source: https://ipython.readthedocs.io/en/8.26.0/api/generated/IPython.display.html
from IPython.display import HTML
display(HTML(html_table))

Cooper Hewitt collection objects selected from the database with the following shared elements:
woe:country = 23424975.0
date = Ca. 1936



Image,Title,Date,Medium,Dimensions,Type,Country,Accession Number
,"Necklace (England), ca. 1936",Ca. 1936,"Bakelite, metal",Overall: 24.4 x 13 x 1.2 cm (9 5/8 x 5 1/8 x 1/2 in.),Necklace,United Kingdom,1992-130-1
,"Poster, Travel in Comfort, Imperial Airways, ca. 1936",Ca. 1936,Offset lithograph on paper mounted on board,102 x 64.1 cm (40 3/16 x 25 1/4 in.),Poster,United Kingdom,1963-39-101


## **OLD CODE CELLS BELOW**

In [None]:
print(image_link)
print(record['Image'])
print(record.get('Image'))
pd.notna(record['Image'])

https://images.collection.cooperhewitt.org/219908_0f686168a388b1d6_z.jpg
https://images.collection.cooperhewitt.org/219908_0f686168a388b1d6_z.jpg
https://images.collection.cooperhewitt.org/219908_0f686168a388b1d6_z.jpg


True

In [None]:
import pandas as pd

data_filepath = '/content/objects-refined.csv'

# Use the 'error_bad_lines=False' argument to skip problematic lines
objects_df = pd.read_csv(data_filepath, on_bad_lines='skip')
# Alternatively, you can use 'warn' instead of 'skip' to get warnings about bad lines

# You can also specify the delimiter explicitly if it's not a comma
# trees_df = pd.read_csv(data_filepath, sep=';', on_bad_lines='skip') # Example with semicolon delimiter

objects_df.head()

  objects_df = pd.read_csv(data_filepath, on_bad_lines='skip')


Unnamed: 0,accession_number,creditline,date,decade,department_id,description,dimensions,dimensions_raw,gallery_text,id,...,type,type_id,url,videos,woe:country,woe:country_id,woe:country_name,year_acquired,year_end,year_start
0,56.2015.10,Courtesy of Pixar Animation Studios,2006,2000.0,404529577,,,,,135726747,...,Dirt sample,136300499.0,https://collection.cooperhewitt.org/objects/13...,,,,,,,
1,56.2015.4,Courtesy of Pixar Animation Studios,2006,2000.0,404529577,,H x W: 21.6 × 45.7 cm (8 1/2 in. × 18 in.),,,135726733,...,Concept art,136251375.0,https://collection.cooperhewitt.org/objects/13...,,,,,,,
2,56.2015.7,Courtesy of Pixar Animation Studios,2006,2000.0,404529577,,H x W: 27.9 × 21.6 cm (11 in. × 8 1/2 in.),,The natural environment plays a large role in ...,135726743,...,Concept art,136251375.0,https://collection.cooperhewitt.org/objects/13...,,,,,,,
3,56.2015.12,Courtesy of Pixar Animation Studios,2006,2000.0,404529577,,,,,135726751,...,Dirt sample,136300499.0,https://collection.cooperhewitt.org/objects/13...,,,,,,,
4,56.2015.2,Courtesy of Pixar Animation Studios,2006,2000.0,404529577,,H x W: 23.5 × 47.6 cm (9 1/4 × 18 3/4 in.),,While on a research road trip for Cars along R...,135726729,...,Concept art,136251375.0,https://collection.cooperhewitt.org/objects/13...,,,,,,,


In [None]:
for column in random_records.values:
  print(f'Image: {column[20]}')
  print(f'Title: {column[24]}')
  print(f'Date: {column[2]}')
  print(f'Medium: {column[17]}')
  print(f'Dimensions: {column[6]}')
  print(f'Type: {column[28]}')
  print(f'Accesssion Number: {column[0]}')
  print() #add an empty print statement to create a break

Image: https://images.collection.cooperhewitt.org/43728_37936fe0b37f1c87_z.jpg
Title: Sidewall (Canada), 1950–56
Date: 1950–56
Medium: Machine-printed
Dimensions: 56.5 cm (22 1/4 in.)
Type: Sidewall
Accesssion Number: 1991-89-209

Image: https://images.collection.cooperhewitt.org/82179_2ceaed803663446c_z.jpg
Title: Sidewall (Canada), 1950–58
Date: 1950–58
Medium: Machine-printed, textured ground
Dimensions: 1062 x 55.5 cm (418 1/8 x 21 7/8 in. )
Type: Sidewall
Accesssion Number: 1991-89-214

Image: https://images.collection.cooperhewitt.org/120717_467d7efb42d2b9a1_z.jpg
Title: Sidewall (Canada), 1954
Date: 1954
Medium: Machine-printed
Dimensions: 55.5 cm (21 7/8 in.)
Type: Sidewall
Accesssion Number: 1991-89-202



In [None]:
import pandas as pd
import random

# Load the data
data_filepath = '/content/objects-refined.csv'

# Group by 'Type' and other elements later on...
grouped_df = object_df.groupby(['date'])

# Get list of group keys
group_keys = list(grouped_df.groups.keys())

# Select a random group
selected_group_key = random.choice(group_keys)
selected_group_records = grouped_df.get_group(selected_group_key)

# Select three random records if available
if len(selected_group_records) >= 3:
    random_records = selected_group_records.sample(n=3)
else:
    random_records = selected_group_records  # Select all if fewer than 3

# Output results
print("Selected Group Key:", selected_group_key)
print("Randomly Selected Records:")
print(random_records)

NameError: name 'object_df' is not defined

In [None]:
output_string = ""

for column in random_records.values:
    output_string += f"""
Image: {column[20]}
Title: {column[24]}
Date: {column[2]}
Medium: {column[17]}
Dimensions: {column[6]}
Type: {column[28]}
Accession Number: {column[0]}

"""  # Add an extra newline for spacing between records

print(output_string)