In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
# define the possible values for genre
genres = ["action", "comedy", "drama", "horror", "romance"]

# generate 1000 random ids
ids = np.arange(1, 1001)
# generate 1000 random filenames
filenames = np.array([f"file_{id}.txt" for id in ids])
# generate a 1000 x 1024 array of random numbers between 0 and 255
x = np.random.randint(0, 256, size=(1000, 1024))
# choose 1000 random genres from the list
genres = np.random.choice(genres, size=1000)

# create a dataframe from the arrays
df = pd.DataFrame(np.column_stack([ids, filenames, x, genres]))
# rename the columns
df.columns = ["id", "filename"] + [f"x{i}" for i in range(1, 1025)] + ["genre"]

# write the dataframe to a csv file
df.to_csv("records.csv", index=False)

In [3]:
df = pd.read_csv("/kaggle/working/records.csv")

In [4]:
df.head()

Unnamed: 0,id,filename,x1,x2,x3,x4,x5,x6,x7,x8,...,x1016,x1017,x1018,x1019,x1020,x1021,x1022,x1023,x1024,genre
0,1,file_1.txt,35,214,141,240,205,149,253,131,...,31,202,221,234,72,107,128,199,81,romance
1,2,file_2.txt,185,41,125,231,18,20,157,182,...,84,210,69,213,229,211,78,102,213,action
2,3,file_3.txt,5,234,163,6,215,175,158,132,...,248,188,226,62,62,84,30,88,12,action
3,4,file_4.txt,228,37,163,156,158,69,111,11,...,43,62,252,144,53,20,157,31,23,comedy
4,5,file_5.txt,197,185,167,233,47,115,181,118,...,85,12,241,17,82,77,113,249,206,horror


In [5]:
from torchvision import models
import torch.nn as nn
import torch

In [6]:
# Load the pretrained model
model = models.resnet50(pretrained=True)

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 170MB/s]


In [7]:
# Replace the last layer with a new classifier
num_features = model.fc.in_features # get the number of input features for the last layer
num_classes = 5 # assuming 5 classes in your dataset
model.fc = nn.Linear(num_features, num_classes) # create a new linear layer with 5 outputs

In [8]:
model.conv1 = nn.Conv2d(1024, 64, kernel_size=7, stride=2, padding=3, bias=False)

In [9]:
model

ResNet(
  (conv1): Conv2d(1024, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(

In [10]:
arr = df.loc[:, [col for col in df.columns if col not in ("id", "filename", "genre")]].values

In [11]:
tensor = torch.tensor(arr)

In [12]:
for param in model.parameters(): # freeze every layer 
    param.requires_grad = False

In [13]:
for param in model.fc.parameters(): # except last fully connected layer
    param.requires_grad = True