In [2]:
%%writefile Imagelib.h
#pragma once
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <cassert>
#include <iostream>
#include <algorithm>
#include <fstream>
#include <string>
#include <vector>
#include <iomanip>
using byte = unsigned char;
#define LOG_OUT(_x_)
#define LOG_OUT_W(_x_)
#define LOG_OUT_A(_x_)
#pragma pack(push, 1)
typedef struct {
    unsigned short bfType;
    unsigned int   bfSize;
    unsigned short bfReserved1;
    unsigned short bfReserved2;
    unsigned int   bfOffBits;
} BITMAPFILEHEADER;

typedef struct {
    unsigned int   biSize;
    int            biWidth;
    int            biHeight;
    unsigned short biPlanes;
    unsigned short biBitCount;
    unsigned int   biCompression;
    unsigned int   biSizeImage;
    int            biXPelsPerMeter;
    int            biYPelsPerMeter;
    unsigned int   biClrUsed;
    unsigned int   biClrImportant;
} BITMAPINFOHEADER;
#pragma pack(pop)
static inline uint32_t row_stride_24(int w) {
    return (uint32_t)(((w * 3) + 3) & ~3u);
}
// ---- 선언 ----
bool LoadBmp(const char* filename, byte** pImage, int& height, int& width);
bool SaveBmp(const char* filename, byte* pImage, int height, int width);
bool convert1Dto2D(byte* src, double** dst_Y, double** dst_U, double** dst_V, int height, int width);
bool convert2Dto1D(double** src_Y, double** src_U, double** src_V, byte* dst, int height, int width);
void convert2Dto3D(double **src2D, double ***dst3D, int height, int width);
void convert3Dto2D(double ***src3D, double **dst2D, int height, int width);
double *dmatrix1D(int nH);
double **dmatrix2D(int nH, int nW);
double ***dmatrix3D(int nH, int nW, int nC);
double ****dmatrix4D(int nH, int nW, int nC, int nNum);
void free_dmatrix1D(double *Image, int nH);
void free_dmatrix2D(double **Image, int nH, int nW);
void free_dmatrix3D(double ***Image, int nH, int nW, int nC);
void free_dmatrix4D(double ****Image, int nH, int nW, int nC, int nNum);
double clip(double x, double minVal, double maxVal);
double** simpleUpsampling2x(double **Image, int nH, int nW);


// ---- 정의 ----
bool LoadBmp(const char* filename, byte** pImage, int& height, int& width) {
    *pImage = nullptr;
    std::FILE* fp = std::fopen(filename, "rb");
    if (!fp) { LOG_OUT_A("fopen() error"); return false; }
    BITMAPFILEHEADER bmf{};
    BITMAPINFOHEADER bmi{};
    if (std::fread(&bmf, sizeof(bmf), 1, fp) != 1) { std::fclose(fp); return false; }
    if (bmf.bfType != 0x4D42) { std::fclose(fp); LOG_OUT_A("not .bmp file"); return false; }
    if (std::fread(&bmi, sizeof(bmi), 1, fp) != 1) { std::fclose(fp); return false; }
    if (bmi.biBitCount != 24 || bmi.biCompression != 0 /*BI_RGB*/) {
        std::fclose(fp); LOG_OUT_A("only 24-bit BI_RGB supported"); return false;
    }
    width  = bmi.biWidth;
    height = (bmi.biHeight >= 0) ? bmi.biHeight : -bmi.biHeight;
    const bool bottom_up = (bmi.biHeight > 0);
    const uint32_t stride = row_stride_24(width);
    const uint32_t data_bytes = stride * (uint32_t)height;
    // 픽셀 데이터 위치로 이동
    if (bmf.bfOffBits > sizeof(bmf) + sizeof(bmi)) {
        std::fseek(fp, (long)bmf.bfOffBits, SEEK_SET);
    }
    // 원본(패딩 포함) 읽기
    std::vector<unsigned char> buf(data_bytes);
    if (std::fread(buf.data(), 1, data_bytes, fp) != data_bytes) { std::fclose(fp); return false; }
    std::fclose(fp);
    // 호출자용 포맷: 패딩 없음, top→bottom, BGR 연속 메모리
    *pImage = (byte*)std::malloc((size_t)width * height * 3);
    if (!*pImage) return false;
    for (int y = 0; y < height; ++y) {
        int src_y = bottom_up ? (height - 1 - y) : y;
        const unsigned char* src = buf.data() + (size_t)src_y * stride;
        byte* dst = *pImage + (size_t)y * width * 3;
        std::memcpy(dst, src, (size_t)width * 3);
    }
    return true;
}
bool SaveBmp(const char* filename, byte* pImage, int height, int width) {
    // pImage: top→bottom, 패딩 없음, BGR 연속
    const uint32_t stride = row_stride_24(width);
    const uint32_t data_bytes = stride * (uint32_t)height;
    BITMAPFILEHEADER bmf{};
    BITMAPINFOHEADER bmi{};
    bmi.biSize = sizeof(BITMAPINFOHEADER);
    bmi.biWidth = width;
    bmi.biHeight = height;        // bottom-up 저장(양수)
    bmi.biPlanes = 1;
    bmi.biBitCount = 24;
    bmi.biCompression = 0;        // BI_RGB
    bmi.biSizeImage = data_bytes;
    bmf.bfType = 0x4D42; // 'BM'
    bmf.bfOffBits = sizeof(BITMAPFILEHEADER) + sizeof(BITMAPINFOHEADER);
    bmf.bfSize = bmf.bfOffBits + data_bytes;
    std::FILE* fp = std::fopen(filename, "wb");
    if (!fp) { LOG_OUT_A("fopen() error"); return false; }
    std::fwrite(&bmf, 1, sizeof(bmf), fp);
    std::fwrite(&bmi, 1, sizeof(bmi), fp);
    std::vector<unsigned char> row(stride, 0);
    for (int y = height - 1; y >= 0; --y) {
        const byte* src = pImage + (size_t)y * width * 3;
        std::memcpy(row.data(), src, (size_t)width * 3);
        std::fwrite(row.data(), 1, stride, fp);
    }
    std::fclose(fp);
    return true;
}
bool convert1Dto2D(byte* src, double** dst_Y, double** dst_U, double** dst_V, int height, int width) {
    int iR, iG, iB;
    for (int y = 0; y < height; y++) {
        for (int x = 0; x < width; x++) {
            iB = src[3 * width * y + 3 * x + 0];
            iG = src[3 * width * y + 3 * x + 1];
            iR = src[3 * width * y + 3 * x + 2];
            dst_Y[y][x] = iR * 0.299 + iG * 0.587 + iB * 0.114;
            dst_U[y][x] = (iB - dst_Y[y][x]) * 0.565;
            dst_V[y][x] = (iR - dst_Y[y][x]) * 0.713;
            dst_Y[y][x] = dst_Y[y][x] / 255.0; // [0,255] → [0,1]
        }
    }
    return true;
}
bool convert2Dto1D(double** src_Y, double** src_U, double** src_V, byte* dst, int height, int width) {
    int iCount = 0;
    int iR, iG, iB;
    for (int y = 0; y < height; y++) {
        for (int x = 0; x < width; x++) {
            double Y = src_Y[y][x] * 255.0;
            iR = (int)clip(Y + 1.403 * src_V[y][x], 0, 255);
            iG = (int)clip(Y - 0.344 * src_U[y][x] - 0.714 * src_V[y][x], 0, 255);
            iB = (int)clip(Y + 1.770 * src_U[y][x], 0, 255);
            dst[iCount + 0] = (byte)iB;
            dst[iCount + 1] = (byte)iG;
            dst[iCount + 2] = (byte)iR;
            iCount += 3;
        }
    }
    return true;
}
double clip(double x, double minVal, double maxVal) {
    if (x < minVal) x = minVal;
    if (x > maxVal) x = maxVal;
    return x;
}
double** simpleUpsampling2x(double **Image, int nH, int nW) {
    double** outImg = dmatrix2D(nH * 2, nW * 2);
    for (int y = 0; y < nH; y++) {
        for (int x = 0; x < nW; x++) {
            outImg[2 * y + 0][2 * x + 0] = Image[y][x];
            outImg[2 * y + 0][2 * x + 1] = Image[y][x];
            outImg[2 * y + 1][2 * x + 0] = Image[y][x];
            outImg[2 * y + 1][2 * x + 1] = Image[y][x];
        }
    }
    return outImg;
}
double *dmatrix1D(int nH) {
    return new double[nH]();
}
double **dmatrix2D(int nH, int nW) {
    double **Temp = new double*[nH];
    for (int y = 0; y < nH; y++) Temp[y] = new double[nW]();
    return Temp;
}
double ***dmatrix3D(int nH, int nW, int nC) {
    double ***Temp = new double**[nH];
    for (int y = 0; y < nH; y++) {
        Temp[y] = new double*[nW];
        for (int x = 0; x < nW; x++) Temp[y][x] = new double[nC]();
    }
    return Temp;
}
double ****dmatrix4D(int nH, int nW, int nC, int nNum) {
    double ****Temp = new double***[nH];
    for (int y = 0; y < nH; y++) {
        Temp[y] = new double**[nW];
        for (int x = 0; x < nW; x++) {
            Temp[y][x] = new double*[nC];
            for (int c = 0; c < nC; c++) Temp[y][x][c] = new double[nNum]();
        }
    }
    return Temp;
}
void free_dmatrix1D(double *Image, int) { delete[] Image; }
void free_dmatrix2D(double **Image, int nH, int) {
    for (int y = 0; y < nH; y++) delete[] Image[y];
    delete[] Image;
}
void free_dmatrix3D(double ***Image, int nH, int nW, int) {
    for (int y = 0; y < nH; y++) {
        for (int x = 0; x < nW; x++) delete[] Image[y][x];
        delete[] Image[y];
    }
    delete[] Image;
}
void free_dmatrix4D(double ****Image, int nH, int nW, int nC, int) {
    for (int y = 0; y < nH; y++) {
        for (int x = 0; x < nW; x++) {
            for (int c = 0; c < nC; c++) delete[] Image[y][x][c];
            delete[] Image[y][x];
        }
        delete[] Image[y];
    }
    delete[] Image;
}
void convert2Dto3D(double **src2D, double ***dst3D, int height, int width) {
    for (int y = 0; y < height; y++)
        for (int x = 0; x < width; x++)
            dst3D[y][x][0] = src2D[y][x];
}
void convert3Dto2D(double ***src3D, double **dst2D, int height, int width) {
    for (int y = 0; y < height; y++)
        for (int x = 0; x < width; x++)
            dst2D[y][x] = src3D[y][x][0];
}

Overwriting Imagelib.h


In [3]:
%%writefile CTensor.h

#pragma once
#include "Imagelib.h"
#include <iostream>
#include <fstream>
#include <stdexcept>
#include <string>
using std::cout;
using std::endl;
using std::string;

// Tensor3D는 크기가 (nH x nW x nC)인 3차원 tensor를 관리함

class Tensor3D {
private:
	double*** tensor;
	int nH; // height
	int nW; // width
	int nC; // channel
public:
	Tensor3D(int _nH, int _nW, int _nC) : nH(_nH), nW(_nW), nC(_nC) {
		// (구현할 것)//////////////////////////////////////////////////////////////////////
		// 동작:
		//
		// 사용함수: dmatrix3D(): 3차원 행렬을 동적 할당해서 pointer를 반환하는 함수
		tensor = dmatrix3D(nH, nW, nC);
	}
	~Tensor3D() {
		// (구현할 것)//////////////////////////////////////////////////////////////////////
		// 동작: 3차원 동적 배열인 tensor를 할당 해제
		// 사용함수: free_dmatrix3D(): 3차원 동적 할당된 행렬을 할당 해제하는 함수
		free_dmatrix3D(tensor, nH, nW, nC);
	}
	void set_elem(int _h, int _w, int _c, double _val) { tensor[_h][_w][_c] = _val; }
	double get_elem(int _h, int _w, int _c)	const {
		// (구현할 것)//////////////////////////////////////////////////////////////////////
		// 동작: 행=_h, 열= _w, 채널= _c 위치 element를 반환할 것
		return tensor[_h][_w][_c];
	}

	void get_info(int& _nH, int& _nW, int& _nC) const {
		// (구현할 것)//////////////////////////////////////////////////////////////////////
		// 동작: 행렬의 차원(nH, nW, nC)을 pass by reference로 반환
		_nH = nH;
		_nW = nW;
		_nC = nC;
	}

	void set_tensor(double*** _tensor) { tensor = _tensor; }
	double*** get_tensor() const { return tensor; }

	void print() const {
		// (구현할 것)//////////////////////////////////////////////////////////////////////
		// 동작: 행렬의 크기 (nH*nW*nC)를 화면에 출력
		printf("Tensor size: %d x %d x %d\n", nH, nW, nC);
	}
};

Writing CTensor.h


In [4]:
%%writefile CLayer.h
#pragma once
#include <iostream>
#include <fstream>
#include <stdexcept>
#include <string>
#include <omp.h>
#include "Imagelib.h"
#include "CTensor.h"
#define MEAN_INIT 0
#define LOAD_INIT 1
using std::cout;
using std::endl;
using std::string;

// Layer는 tensor를 입/출력으로 가지며, 특정 operation을 수행하는 Convolutional Neural Netowork의 기본 연산 단위


class Layer {
protected:
	int fK; // kernel size in K*K kernel
	int fC_in; // number of channels
	int fC_out; //number of filters
	string name;
public:
	Layer(string _name, int _fK, int _fC_in, int _fC_out) : name(_name), fK(_fK), fC_in(_fC_in), fC_out(_fC_out) {}
	virtual ~Layer() {}; //가상소멸자 (참고: https://wonjayk.tistory.com/243)
	virtual Tensor3D* forward(const Tensor3D* input) = 0;
	//	virtual bool backward() = 0;
	virtual void print() const = 0;
	virtual void get_info(string& _name, int& _fK, int& _fC_in, int& _fC_out) const = 0;
};


class Layer_ReLU : public Layer {
public:
	Layer_ReLU(string _name, int _fK, int _fC_in, int _fC_out)
		: Layer(_name, _fK, _fC_in, _fC_out)
	{
		// (구현할 것)//////////////////////////////////////////////////////////////////////
		// 동작1: Base class의 생성자를 호출하여 맴버 변수를 초기화 할 것(반드시 initialization list를 사용할 것)
	}
	~Layer_ReLU() {}
	Tensor3D* forward(const Tensor3D* input) override {
		// (구현할 것)
		// 동작1: input tensor에 대해 각 element x가 양수이면 그대로 전달, 음수이면 0으로 output tensor에 전달할것
		// 동작2: 이때, output tensor는 동적할당하여 주소값을 반환할 것
		// 함수1: Tensor3D의 맴버함수인 get_info(), get_elem(), set_elem()을 적절히 활용할 것
		int H, W, C;
		input->get_info(H, W, C);

		Tensor3D* output = new Tensor3D(H, W, C);
		for (int h = 0; h < H; h++) {
			for (int w = 0; w < W; w++) {
				for (int c = 0; c < C; c++) {
					double val = input->get_elem(h, w, c);
					output->set_elem(h, w, c, (val > 0) ? val : 0.0);
				}
			}
		}
		cout << name << " is finished" << endl;
		return output;
	};
	void get_info(string& _name, int& _fK, int& _fC_in, int& _fC_out) const override {
		// (구현할 것)//////////////////////////////////////////////////////////////////////
		// 동작: Tensor3D의 get_info()와 마찬가지로 맴버 변수들을 pass by reference로 외부에 전달
		_name = name;
		_fK = fK;
		_fC_in = fC_in;
		_fC_out = fC_out;
	}
	void print() const override {
		// (구현할 것)//////////////////////////////////////////////////////////////////////
		// 동작: Tensor3D의 print()와 마찬가지로 차원의 크기를 화면에 출력
		cout << "Layer: " << name << " (ReLU) "
			<< "Kernel=" << fK
			<< " Cin=" << fC_in
			<< " Cout=" << fC_out << endl;
	}
};



class Layer_Conv : public Layer {
private:
	string filename_weight;
	string filename_bias;
	double**** weight_tensor; // fK x fK x _fC_in x _fC_out 크기를 가지는 4차원 배열
	double*  bias_tensor;     // _fC_out 크기를 가지는 1차원 배열 (bias는 각 filter당 1개 존재)
public:
	Layer_Conv(string _name, int _fK, int _fC_in, int _fC_out, int init_type, string _filename_weight = "", string _filename_bias = "")
	:Layer(_name, _fK, _fC_in, _fC_out),
          filename_weight(_filename_weight), filename_bias(_filename_bias)
	{
		// (구현할 것)//////////////////////////////////////////////////////////////////////
		// 동작1: initialization list와 base class의 생성자를 이용하여 맴버 변수를 초기화 할 것
		// 동작2: filename_weight와 filename_bias는 LOAD_INIT 모드일 경우 해당 파일로부터 가중치/바이어스를 불러옴
		// 동작3: init() 함수는 init_type를 입력으로 받아 가중치를 초기화 함
		// 함수1: dmatrix4D()와 dmatrix1D()를 사용하여 1차원, 4차원 배열을 동적 할당할 것
		weight_tensor = dmatrix4D(fK, fK, fC_in, fC_out);
		bias_tensor = dmatrix1D(fC_out);


		init(init_type);
	}
	void init(int init_type) {
		// (구현할 것)//////////////////////////////////////////////////////////////////////
		// 동작1: init_type (MEAN_INIT 또는 LOAD_INIT)에 따라 가중치를 다른 방식으로 초기화 함
		// 동작2: MEAN_INIT의 경우 필터는 평균값을 산출하는 필터가 됨 (즉, 모든 가중치 값이 필터의 크기(fK*fK*fC_in)의 역수와 같아짐 (이때 bias는 모두 0으로 설정)
		// 동작3: LOAD_INIT의 경우 filename_weight, filename_bias의 이름을 가지는 파일의 값을 읽어 가중치에 저장(초기화) 함
		// 함수1: dmatrix4D()와 dmatrix1D()를 사용하여 1차원, 4차원 배열을 동적 할당할 것
		if (init_type == MEAN_INIT) {
			double val = 1.0 / (fK * fK * fC_in);
			for (int y = 0; y < fK; y++) {
				for (int x = 0; x < fK; x++) {
					for (int c = 0; c < fC_in; c++) {
						for (int n = 0; n < fC_out; n++) {
							weight_tensor[y][x][c][n] = val;
						}
					}
				}
			}
			for (int n = 0; n < fC_out; n++) bias_tensor[n] = 0.0;
		}
		else if (init_type == LOAD_INIT) {
		std::ifstream weight_file(filename_weight);
        if (!weight_file.is_open()) {
            // 파일 열기 실패 시 에러 메시지 출력 후 프로그램 종료
            std::cerr << "Error: Could not open weight file: " << filename_weight << std::endl;
            exit(EXIT_FAILURE);
        }

        std::cout << "Loading weights from " << filename_weight << "..." << std::endl;
        for (int y = 0; y < fK; y++) {
            for (int x = 0; x < fK; x++) {
                for (int c = 0; c < fC_in; c++) {
                    for (int n = 0; n < fC_out; n++) {
                        // 파일에서 double 값을 하나씩 읽어와 텐서에 저장
                        weight_file >> weight_tensor[y][x][c][n];
                    }
                }
            }
        }
        weight_file.close(); // 파일 닫기

        // 2. Bias Tensor 불러오기
        std::ifstream bias_file(filename_bias);
        if (!bias_file.is_open()) {
            std::cerr << "Error: Could not open bias file: " << filename_bias << std::endl;
            exit(EXIT_FAILURE);
        }

        std::cout << "Loading biases from " << filename_bias << "..." << std::endl;
        for (int n = 0; n < fC_out; n++) {
            bias_file >> bias_tensor[n];
        }
        bias_file.close();}
	}
	~Layer_Conv() override {
		// (구현할 것)//////////////////////////////////////////////////////////////////////
		// 동작1: weight_tensor와 bias_tensor를 동적 할당 해제할 것
		// 함수1: free_dmatrix4D(), free_dmatrix1D() 함수를 사용
		free_dmatrix4D(weight_tensor, fK, fK, fC_in, fC_out);
		free_dmatrix1D(bias_tensor, fC_out);
	}
	Tensor3D* forward(const Tensor3D* input) override {
		// (구현할 것)//////////////////////////////////////////////////////////////////////
		// 동작1: 컨볼루션 (각 위치마다 y = WX + b)를 수행
		// 동작2: output (Tensor3D type)를 먼저 동적 할당하고 연산이 완료된 다음 pointer를 반환
		int H, W, C;
		input->get_info(H, W, C);
		assert(C == fC_in); // 입력 채널이 일치해야 함

		// 1. 'Same Padding'을 위한 패딩 값 계산
	// 3x3 커널일 경우 pad = 1, 5x5 커널일 경우 pad = 2
		int pad = fK / 2;

		// 2. 출력 텐서의 크기는 입력과 동일하게 설정
		int outH = H;
		int outW = W;

		Tensor3D* output = new Tensor3D(outH, outW, fC_out);

		// 출력 텐서의 모든 픽셀(oh, ow)에 대해 연산
		#pragma omp parallel for collapse(3) schedule(dynamic)
		for (int oh = 0; oh < outH; oh++) {
				for (int ow = 0; ow < outW; ow++) {
						for (int f = 0; f < fC_out; f++) {
								double sum = 0.0;
								for (int kh = 0; kh < fK; kh++) {
										for (int kw = 0; kw < fK; kw++) {
												for (int c = 0; c < fC_in; c++) {
														int ih = oh + kh - pad;
														int iw = ow + kw - pad;
														if (ih >= 0 && ih < H && iw >= 0 && iw < W) {
																double val = input->get_elem(ih, iw, c);
																sum += val * weight_tensor[kh][kw][c][f];
														}
												}
										}
								}
								sum += bias_tensor[f];
								output->set_elem(oh, ow, f, sum);
						}
				}
		}
		cout  << name << " is finished" << endl;
		return output;
	};
	void get_info(string& _name, int& _fK, int& _fC_in, int& _fC_out) const override {
		// (구현할 것)//////////////////////////////////////////////////////////////////////
		// 동작: Layer_ReLU와 동일
		_name = name;
		_fK = fK;
		_fC_in = fC_in;
		_fC_out = fC_out;
	}
	void print() const override {
		// (구현할 것)//////////////////////////////////////////////////////////////////////
		// 동작: Layer_ReLU와 동일
		cout << "Layer: " << name << " (Conv) "
			<< "Kernel=" << fK
			<< " Cin=" << fC_in
			<< " Cout=" << fC_out << endl;
	}
};





Writing CLayer.h


In [5]:
%%writefile CModel.h
#pragma once
#include <vector>
#include <string>
#include <iostream>
#include <iomanip>
#include "CLayer.h"
using std::vector;
using std::string;
using std::cout;

using std::endl;
using std::setw;

// Model은 layer와 tensor들을 모두 통합 관리하여 효과적으로 CNN이 수행될 수 있도록 함

class Model {
private:
	vector<Layer*> layers; //layer들을 순차적으로 저장
	vector<Tensor3D*> tensors;// tensor들을 순차적으로 저장 ( 0번째 tensor는 0번째 layer의 입력, 마찬가지로 1번째 tensor는 1번째 layer의 입력이자 0번째 layer의 출력임)
public:
	Model() {}
	void add_layer(Layer* layer) {
		// (구현할 것) //////////////////////////////////////////////////
		// 동작: layer 객체를 layers vector의 마지막 element로 저장
		layers.push_back(layer);
	}
	~Model() {
		// (구현할 것)//////////////////////////////////////////////////
		// 동작: layers와 tensors의 모든 element를 동적할당 해제해 줄 것
		for (auto& l : layers) {
			delete l;
		}
		layers.clear();

		for (auto& t : tensors) {
			delete t;
		}
		tensors.clear();
	}
	void test(string filename_input, string filename_output) {
		// (구현할 것)//////////////////////////////////////////////////
		// 동작1: filename_input으로부터 이미지를 읽어와서, tensor로 변환한 다음 CNN을 수행한다음 그 결과물을 filename_output에 저장
		// 동작2: 주석 (1), (2), (3), (4) 중 (2)번만 구현하면 됨

		int nH, nW;
		double** input_img_Y, **input_img_U, **input_img_V;
		byte* pLoadImage;

		// (1) 영상을 읽어서 2차원 배열로 저장 (input_img_Y, U, V는 read_image에서 동적 할당됨)
		read_image(filename_input, pLoadImage, input_img_Y, input_img_U, input_img_V, nH, nW);
		cout << "Reading (" << filename_input << ") is complete..." << endl;


		// (2) 이부분만 구현할 것//////////////////////////////////////////////////
		// 동작1: 현재 tensors의 0번째 element에 영상(CNN의 입력)이 이미 저장되어 있음
		// 동작2: tensors vector의 i번째 tensor를 layers vector에 있는 i번째 layer의 forward함수로 입력받고, 그 결과를 tensors vector의 i+1번째 tensor로 저장함
		// 동작3: 결과적으로 tensors의 가장 마지막 tensor는 CNN의 출력값이 됨 (이 출력값은 (3)에서 1차원 배열로 변환되어 이미지 파일에 저장됨
		for (size_t i = 0; i < layers.size(); i++) {
			Tensor3D* input_tensor = tensors.at(i);            // i번째 tensor를 입력으로
			Tensor3D* output_tensor = layers.at(i)->forward(input_tensor); // layer forward
			tensors.push_back(output_tensor);                  // i+1번째 tensor로 추가

		}
		Tensor3D* input_tensor = tensors.at(0);
		Tensor3D* residual_tensor = tensors.at(tensors.size() - 1);

		int H, W, C;
		input_tensor->get_info(H, W, C);

		// 최종 결과를 저장할 새로운 텐서를 생성
		Tensor3D* final_image_tensor = new Tensor3D(H, W, C);

		for (int h = 0; h < H; h++) {
			for (int w = 0; w < W; w++) {
				double input_val = input_tensor->get_elem(h, w, 0);
				double residual_val = residual_tensor->get_elem(h, w, 0);
				// 원본 + 차이 값 = 최종 결과
				final_image_tensor->set_elem(h, w, 0, input_val + residual_val);
			}
		}
		// 완성된 최종 이미지를 텐서 목록의 맨 뒤에 추가
		tensors.push_back(final_image_tensor);

		cout << "Super-resolution is complete..." << endl;

		cout << "2";




		// (3) CNN의 출력(마지막 tensor)을 2차원 배열로 변환 후 U, V 채널과 함께 이미지로 저장
		Tensor3D* output_tensor_Y = tensors.at(tensors.size() - 1);
		output_tensor_Y->print();
		save_image(filename_output, pLoadImage, output_tensor_Y, input_img_U, input_img_V, nH, nW);
		cout << "Saving (" << filename_output << ") is complete..." << endl;

		cout << "3";

		// (4) 할당 해제
		free(pLoadImage);
		free_dmatrix2D(input_img_Y, nH, nW);
		free_dmatrix2D(input_img_U, nH, nW);
		free_dmatrix2D(input_img_V, nH, nW);

		cout << "4";
	}

	void read_image(const string filename, byte*& pLoadImage, double**& img_Y, double**& img_U, double**& img_V, int& nH, int& nW) {

		LoadBmp(filename.c_str(), &pLoadImage, nH, nW);///이미지파일 읽기

		img_Y = dmatrix2D(nH, nW);
		img_U = dmatrix2D(nH, nW);
		img_V = dmatrix2D(nH, nW);

		convert1Dto2D(pLoadImage, img_Y, img_U, img_V, nH, nW);

		// 입력 영상을 tensor로 변환 후 첫번째 element에 저장
		double*** inImage3D = dmatrix3D(nH, nW, 1);
		convert2Dto3D(img_Y, inImage3D, nH, nW);

		Tensor3D* temp = new Tensor3D(nH, nW, 1);
		temp->set_tensor(inImage3D);
		tensors.push_back(temp);

	}
	void save_image(string filename, byte*& pLoadImage, Tensor3D*& tensor_Y, double** img_U, double** img_V, int nH, int nW) {
		double** img_Y = dmatrix2D(nH, nW);
		convert3Dto2D(tensor_Y->get_tensor(), img_Y, nH, nW);
		convert2Dto1D(img_Y, img_U, img_V, pLoadImage, nH, nW);
		SaveBmp(filename.c_str(), pLoadImage, nH, nW);
		free_dmatrix2D(img_Y, nH, nW);
	}
	void print_layer_info() const {
		cout << endl << "(Layer information)_____________" << endl;
		for (unsigned i = 0; i < layers.size(); i++) {
			cout << i + 1 << "-th layer: ";
			layers.at(i)->print();
		}
	}
	void print_tensor_info() const {
		cout << endl << "(Tensor information)_____________" << endl;
		for (unsigned i = 0; i < tensors.size(); i++) {
			cout << i + 1 << "-th tensor: ";
			tensors.at(i)->print();
		}
	}

	//	void train();
};

Writing CModel.h


In [6]:
%%writefile main.cpp

#include "Imagelib.h"
#include "CModel.h"
#include "CTensor.h"
#include <chrono>
#include <cstdio>
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <sstream>
#ifdef _OPENMP
#include <omp.h>
#endif
#include "CModel.h"
using namespace std;

// 정확하게 동작시 20점 (부분점수 없음)

int main() {
	Model model;
	double start_time = omp_get_wtime();
	// build model
	model.add_layer(new Layer_Conv("Conv1", 9, 1, 64, LOAD_INIT, "/content/model/weights_conv1_9x9x1x64.txt", "/content/model/biases_conv1_64.txt"));
	model.add_layer(new Layer_ReLU("Relu1", 1, 64, 64));
	model.add_layer(new Layer_Conv("Conv2", 5, 64, 32, LOAD_INIT, "/content/model/weights_conv2_5x5x64x32.txt", "/content/model/biases_conv2_32.txt"));
	model.add_layer(new Layer_ReLU("Relu2", 1, 32, 32));
	model.add_layer(new Layer_Conv("Conv3", 5, 32, 1, LOAD_INIT, "/content/model/weights_conv3_5x5x32x1.txt", "/content/model/biases_conv3_1.txt"));


	model.test("/content/baby_512x512_input.bmp", "/content/baby_512x512_output_srcnn.bmp");

	model.print_layer_info();
	model.print_tensor_info();
	system("PAUSE");
	double end_time = omp_get_wtime();
	std::cout << " took " << (end_time - start_time) << " seconds.\n";
	return 0;
}

Writing main.cpp


In [7]:
# 1. 컴파일 (출력 실행 파일 이름은 main으로)
!g++ -std=c++17 -O2 -Wall main.cpp -fopenmp -o main

# 2. 실행
!./main

In file included from [01m[KCModel.h:6[m[K,
                 from [01m[Kmain.cpp:3[m[K:
[01m[KCLayer.h:[m[K In constructor ‘[01m[KLayer::Layer(std::string, int, int, int)[m[K’:
   23 |         string [01;35m[Kname[m[K;
      |                [01;35m[K^~~~[m[K
   20 |         int [01;35m[KfK[m[K; // kernel size in K*K kernel
      |             [01;35m[K^~[m[K
   25 |         [01;35m[KLayer[m[K(string _name, int _fK, int _fC_in, int _fC_out) : name(_name), fK(_fK), fC_in(_fC_in), fC_out(_fC_out) {}
      |         [01;35m[K^~~~~[m[K
[01m[Kmain.cpp:[m[K In function ‘[01m[Kint main()[m[K’:
   34 |         [01;35m[Ksystem("PAUSE")[m[K;
      |         [01;35m[K~~~~~~^~~~~~~~~[m[K
Loading weights from /content/model/weights_conv1_9x9x1x64.txt...
Loading biases from /content/model/biases_conv1_64.txt...
Loading weights from /content/model/weights_conv2_5x5x64x32.txt...
Loading biases from /content/model/biases_conv2_32.txt...
Loading we

### 병렬처리 이후

In [8]:
%%writefile Imagelib.h
#pragma once
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <cassert>
#include <iostream>
#include <algorithm>
#include <fstream>
#include <string>
#include <vector>
#include <iomanip>
using byte = unsigned char;
#define LOG_OUT(_x_)
#define LOG_OUT_W(_x_)
#define LOG_OUT_A(_x_)
#pragma pack(push, 1)
typedef struct {
    unsigned short bfType;
    unsigned int   bfSize;
    unsigned short bfReserved1;
    unsigned short bfReserved2;
    unsigned int   bfOffBits;
} BITMAPFILEHEADER;

typedef struct {
    unsigned int   biSize;
    int            biWidth;
    int            biHeight;
    unsigned short biPlanes;
    unsigned short biBitCount;
    unsigned int   biCompression;
    unsigned int   biSizeImage;
    int            biXPelsPerMeter;
    int            biYPelsPerMeter;
    unsigned int   biClrUsed;
    unsigned int   biClrImportant;
} BITMAPINFOHEADER;
#pragma pack(pop)
static inline uint32_t row_stride_24(int w) {
    return (uint32_t)(((w * 3) + 3) & ~3u);
}
// ---- 선언 ----
bool LoadBmp(const char* filename, byte** pImage, int& height, int& width);
bool SaveBmp(const char* filename, byte* pImage, int height, int width);
bool convert1Dto2D(byte* src, double** dst_Y, double** dst_U, double** dst_V, int height, int width);
bool convert2Dto1D(double** src_Y, double** src_U, double** src_V, byte* dst, int height, int width);
void convert2Dto3D(double **src2D, double ***dst3D, int height, int width);
void convert3Dto2D(double ***src3D, double **dst2D, int height, int width);
double *dmatrix1D(int nH);
double **dmatrix2D(int nH, int nW);
double ***dmatrix3D(int nH, int nW, int nC);
double ****dmatrix4D(int nH, int nW, int nC, int nNum);
void free_dmatrix1D(double *Image, int nH);
void free_dmatrix2D(double **Image, int nH, int nW);
void free_dmatrix3D(double ***Image, int nH, int nW, int nC);
void free_dmatrix4D(double ****Image, int nH, int nW, int nC, int nNum);
double clip(double x, double minVal, double maxVal);
double** simpleUpsampling2x(double **Image, int nH, int nW);


// ---- 정의 ----
bool LoadBmp(const char* filename, byte** pImage, int& height, int& width) {
    *pImage = nullptr;
    std::FILE* fp = std::fopen(filename, "rb");
    if (!fp) { LOG_OUT_A("fopen() error"); return false; }
    BITMAPFILEHEADER bmf{};
    BITMAPINFOHEADER bmi{};
    if (std::fread(&bmf, sizeof(bmf), 1, fp) != 1) { std::fclose(fp); return false; }
    if (bmf.bfType != 0x4D42) { std::fclose(fp); LOG_OUT_A("not .bmp file"); return false; }
    if (std::fread(&bmi, sizeof(bmi), 1, fp) != 1) { std::fclose(fp); return false; }
    if (bmi.biBitCount != 24 || bmi.biCompression != 0 /*BI_RGB*/) {
        std::fclose(fp); LOG_OUT_A("only 24-bit BI_RGB supported"); return false;
    }
    width  = bmi.biWidth;
    height = (bmi.biHeight >= 0) ? bmi.biHeight : -bmi.biHeight;
    const bool bottom_up = (bmi.biHeight > 0);
    const uint32_t stride = row_stride_24(width);
    const uint32_t data_bytes = stride * (uint32_t)height;
    // 픽셀 데이터 위치로 이동
    if (bmf.bfOffBits > sizeof(bmf) + sizeof(bmi)) {
        std::fseek(fp, (long)bmf.bfOffBits, SEEK_SET);
    }
    // 원본(패딩 포함) 읽기
    std::vector<unsigned char> buf(data_bytes);
    if (std::fread(buf.data(), 1, data_bytes, fp) != data_bytes) { std::fclose(fp); return false; }
    std::fclose(fp);
    // 호출자용 포맷: 패딩 없음, top→bottom, BGR 연속 메모리
    *pImage = (byte*)std::malloc((size_t)width * height * 3);
    if (!*pImage) return false;
    for (int y = 0; y < height; ++y) {
        int src_y = bottom_up ? (height - 1 - y) : y;
        const unsigned char* src = buf.data() + (size_t)src_y * stride;
        byte* dst = *pImage + (size_t)y * width * 3;
        std::memcpy(dst, src, (size_t)width * 3);
    }
    return true;
}
bool SaveBmp(const char* filename, byte* pImage, int height, int width) {
    // pImage: top→bottom, 패딩 없음, BGR 연속
    const uint32_t stride = row_stride_24(width);
    const uint32_t data_bytes = stride * (uint32_t)height;
    BITMAPFILEHEADER bmf{};
    BITMAPINFOHEADER bmi{};
    bmi.biSize = sizeof(BITMAPINFOHEADER);
    bmi.biWidth = width;
    bmi.biHeight = height;        // bottom-up 저장(양수)
    bmi.biPlanes = 1;
    bmi.biBitCount = 24;
    bmi.biCompression = 0;        // BI_RGB
    bmi.biSizeImage = data_bytes;
    bmf.bfType = 0x4D42; // 'BM'
    bmf.bfOffBits = sizeof(BITMAPFILEHEADER) + sizeof(BITMAPINFOHEADER);
    bmf.bfSize = bmf.bfOffBits + data_bytes;
    std::FILE* fp = std::fopen(filename, "wb");
    if (!fp) { LOG_OUT_A("fopen() error"); return false; }
    std::fwrite(&bmf, 1, sizeof(bmf), fp);
    std::fwrite(&bmi, 1, sizeof(bmi), fp);
    std::vector<unsigned char> row(stride, 0);
    for (int y = height - 1; y >= 0; --y) {
        const byte* src = pImage + (size_t)y * width * 3;
        std::memcpy(row.data(), src, (size_t)width * 3);
        std::fwrite(row.data(), 1, stride, fp);
    }
    std::fclose(fp);
    return true;
}
bool convert1Dto2D(byte* src, double** dst_Y, double** dst_U, double** dst_V, int height, int width) {
    int iR, iG, iB;
    for (int y = 0; y < height; y++) {
        for (int x = 0; x < width; x++) {
            iB = src[3 * width * y + 3 * x + 0];
            iG = src[3 * width * y + 3 * x + 1];
            iR = src[3 * width * y + 3 * x + 2];
            dst_Y[y][x] = iR * 0.299 + iG * 0.587 + iB * 0.114;
            dst_U[y][x] = (iB - dst_Y[y][x]) * 0.565;
            dst_V[y][x] = (iR - dst_Y[y][x]) * 0.713;
            dst_Y[y][x] = dst_Y[y][x] / 255.0; // [0,255] → [0,1]
        }
    }
    return true;
}
bool convert2Dto1D(double** src_Y, double** src_U, double** src_V, byte* dst, int height, int width) {
    int iCount = 0;
    int iR, iG, iB;
    for (int y = 0; y < height; y++) {
        for (int x = 0; x < width; x++) {
            double Y = src_Y[y][x] * 255.0;
            iR = (int)clip(Y + 1.403 * src_V[y][x], 0, 255);
            iG = (int)clip(Y - 0.344 * src_U[y][x] - 0.714 * src_V[y][x], 0, 255);
            iB = (int)clip(Y + 1.770 * src_U[y][x], 0, 255);
            dst[iCount + 0] = (byte)iB;
            dst[iCount + 1] = (byte)iG;
            dst[iCount + 2] = (byte)iR;
            iCount += 3;
        }
    }
    return true;
}
double clip(double x, double minVal, double maxVal) {
    if (x < minVal) x = minVal;
    if (x > maxVal) x = maxVal;
    return x;
}
double** simpleUpsampling2x(double **Image, int nH, int nW) {
    double** outImg = dmatrix2D(nH * 2, nW * 2);
    for (int y = 0; y < nH; y++) {
        for (int x = 0; x < nW; x++) {
            outImg[2 * y + 0][2 * x + 0] = Image[y][x];
            outImg[2 * y + 0][2 * x + 1] = Image[y][x];
            outImg[2 * y + 1][2 * x + 0] = Image[y][x];
            outImg[2 * y + 1][2 * x + 1] = Image[y][x];
        }
    }
    return outImg;
}
double *dmatrix1D(int nH) {
    return new double[nH]();
}
double **dmatrix2D(int nH, int nW) {
    double **Temp = new double*[nH];
    for (int y = 0; y < nH; y++) Temp[y] = new double[nW]();
    return Temp;
}
double ***dmatrix3D(int nH, int nW, int nC) {
    double ***Temp = new double**[nH];
    for (int y = 0; y < nH; y++) {
        Temp[y] = new double*[nW];
        for (int x = 0; x < nW; x++) Temp[y][x] = new double[nC]();
    }
    return Temp;
}
double ****dmatrix4D(int nH, int nW, int nC, int nNum) {
    double ****Temp = new double***[nH];
    for (int y = 0; y < nH; y++) {
        Temp[y] = new double**[nW];
        for (int x = 0; x < nW; x++) {
            Temp[y][x] = new double*[nC];
            for (int c = 0; c < nC; c++) Temp[y][x][c] = new double[nNum]();
        }
    }
    return Temp;
}
void free_dmatrix1D(double *Image, int) { delete[] Image; }
void free_dmatrix2D(double **Image, int nH, int) {
    for (int y = 0; y < nH; y++) delete[] Image[y];
    delete[] Image;
}
void free_dmatrix3D(double ***Image, int nH, int nW, int) {
    for (int y = 0; y < nH; y++) {
        for (int x = 0; x < nW; x++) delete[] Image[y][x];
        delete[] Image[y];
    }
    delete[] Image;
}
void free_dmatrix4D(double ****Image, int nH, int nW, int nC, int) {
    for (int y = 0; y < nH; y++) {
        for (int x = 0; x < nW; x++) {
            for (int c = 0; c < nC; c++) delete[] Image[y][x][c];
            delete[] Image[y][x];
        }
        delete[] Image[y];
    }
    delete[] Image;
}
void convert2Dto3D(double **src2D, double ***dst3D, int height, int width) {
    for (int y = 0; y < height; y++)
        for (int x = 0; x < width; x++)
            dst3D[y][x][0] = src2D[y][x];
}
void convert3Dto2D(double ***src3D, double **dst2D, int height, int width) {
    for (int y = 0; y < height; y++)
        for (int x = 0; x < width; x++)
            dst2D[y][x] = src3D[y][x][0];
}

Overwriting Imagelib.h


In [9]:
%%writefile CTensor.h

#pragma once
#include "Imagelib.h"
#include <iostream>
#include <fstream>
#include <stdexcept>
#include <string>
using std::cout;
using std::endl;
using std::string;

// Tensor3D는 크기가 (nH x nW x nC)인 3차원 tensor를 관리함

class Tensor3D {
private:
	double*** tensor;
	int nH; // height
	int nW; // width
	int nC; // channel
public:
	Tensor3D(int _nH, int _nW, int _nC) : nH(_nH), nW(_nW), nC(_nC) {
		// (구현할 것)//////////////////////////////////////////////////////////////////////
		// 동작:
		//
		// 사용함수: dmatrix3D(): 3차원 행렬을 동적 할당해서 pointer를 반환하는 함수
		tensor = dmatrix3D(nH, nW, nC);
	}
	~Tensor3D() {
		// (구현할 것)//////////////////////////////////////////////////////////////////////
		// 동작: 3차원 동적 배열인 tensor를 할당 해제
		// 사용함수: free_dmatrix3D(): 3차원 동적 할당된 행렬을 할당 해제하는 함수
		free_dmatrix3D(tensor, nH, nW, nC);
	}
	void set_elem(int _h, int _w, int _c, double _val) { tensor[_h][_w][_c] = _val; }
	double get_elem(int _h, int _w, int _c)	const {
		// (구현할 것)//////////////////////////////////////////////////////////////////////
		// 동작: 행=_h, 열= _w, 채널= _c 위치 element를 반환할 것
		return tensor[_h][_w][_c];
	}

	void get_info(int& _nH, int& _nW, int& _nC) const {
		// (구현할 것)//////////////////////////////////////////////////////////////////////
		// 동작: 행렬의 차원(nH, nW, nC)을 pass by reference로 반환
		_nH = nH;
		_nW = nW;
		_nC = nC;
	}

	void set_tensor(double*** _tensor) { tensor = _tensor; }
	double*** get_tensor() const { return tensor; }

	void print() const {
		// (구현할 것)//////////////////////////////////////////////////////////////////////
		// 동작: 행렬의 크기 (nH*nW*nC)를 화면에 출력
		printf("Tensor size: %d x %d x %d\n", nH, nW, nC);
	}
};

Overwriting CTensor.h


In [10]:
%%writefile CLayer.h
#pragma once
#include <iostream>
#include <fstream>
#include <stdexcept>
#include <string>
#include <omp.h>
#include "Imagelib.h"
#include "CTensor.h"
#define MEAN_INIT 0
#define LOAD_INIT 1
using std::cout;
using std::endl;
using std::string;

class Layer {
protected:
	int fK; // kernel size in K*K kernel
	int fC_in; // number of channels
	int fC_out; //number of filters
	string name;
public:
	Layer(string _name, int _fK, int _fC_in, int _fC_out) : name(_name), fK(_fK), fC_in(_fC_in), fC_out(_fC_out) {}
	virtual ~Layer() {};
	virtual Tensor3D* forward(const Tensor3D* input) = 0;
	virtual void print() const = 0;
	virtual void get_info(string& _name, int& _fK, int& _fC_in, int& _fC_out) const = 0;
};

class Layer_ReLU : public Layer {
public:
	Layer_ReLU(string _name, int _fK, int _fC_in, int _fC_out)
		: Layer(_name, _fK, _fC_in, _fC_out) {}
	~Layer_ReLU() {}

	Tensor3D* forward(const Tensor3D* input) override {
		int H, W, C;
		input->get_info(H, W, C);

		Tensor3D* output = new Tensor3D(H, W, C);

		#pragma omp parallel for collapse(3)
		for (int h = 0; h < H; h++) {
			for (int w = 0; w < W; w++) {
				for (int c = 0; c < C; c++) {
					double val = input->get_elem(h, w, c);
					output->set_elem(h, w, c, (val > 0) ? val : 0.0);
				}
			}
		}

		cout << name << " is finished" << endl;
		return output;
	};

	void get_info(string& _name, int& _fK, int& _fC_in, int& _fC_out) const override {
		_name = name;
		_fK = fK;
		_fC_in = fC_in;
		_fC_out = fC_out;
	}

	void print() const override {
		cout << "Layer: " << name << " (ReLU) "
			<< "Kernel=" << fK
			<< " Cin=" << fC_in
			<< " Cout=" << fC_out << endl;
	}
};

class Layer_Conv : public Layer {
private:
	string filename_weight;
	string filename_bias;
	double**** weight_tensor;
	double*  bias_tensor;
public:
	Layer_Conv(string _name, int _fK, int _fC_in, int _fC_out, int init_type, string _filename_weight = "", string _filename_bias = "")
	:Layer(_name, _fK, _fC_in, _fC_out), filename_weight(_filename_weight), filename_bias(_filename_bias)
	{
		weight_tensor = dmatrix4D(fK, fK, fC_in, fC_out);
		bias_tensor = dmatrix1D(fC_out);
		init(init_type);
	}

	void init(int init_type) {
		if (init_type == MEAN_INIT) {
			double val = 1.0 / (fK * fK * fC_in);
			for (int y = 0; y < fK; y++)
				for (int x = 0; x < fK; x++)
					for (int c = 0; c < fC_in; c++)
						for (int n = 0; n < fC_out; n++)
							weight_tensor[y][x][c][n] = val;
			for (int n = 0; n < fC_out; n++) bias_tensor[n] = 0.0;
		}
		else if (init_type == LOAD_INIT) {
			std::ifstream weight_file(filename_weight);
			if (!weight_file.is_open()) { std::cerr << "Error: Could not open weight file: " << filename_weight << std::endl; exit(EXIT_FAILURE); }
			for (int y = 0; y < fK; y++)
				for (int x = 0; x < fK; x++)
					for (int c = 0; c < fC_in; c++)
						for (int n = 0; n < fC_out; n++)
							weight_file >> weight_tensor[y][x][c][n];
			weight_file.close();

			std::ifstream bias_file(filename_bias);
			if (!bias_file.is_open()) { std::cerr << "Error: Could not open bias file: " << filename_bias << std::endl; exit(EXIT_FAILURE); }
			for (int n = 0; n < fC_out; n++) bias_file >> bias_tensor[n];
			bias_file.close();
		}
	}

	~Layer_Conv() override {
		free_dmatrix4D(weight_tensor, fK, fK, fC_in, fC_out);
		free_dmatrix1D(bias_tensor, fC_out);
	}

	Tensor3D* forward(const Tensor3D* input) override {
		int H, W, C;
		input->get_info(H, W, C);
		assert(C == fC_in);

		int pad = fK / 2;
		int outH = H, outW = W;
		Tensor3D* output = new Tensor3D(outH, outW, fC_out);

		#pragma omp parallel for collapse(3) schedule(dynamic)
		for (int oh = 0; oh < outH; oh++) {
			for (int ow = 0; ow < outW; ow++) {
				for (int f = 0; f < fC_out; f++) {
					double sum = 0.0;
					for (int kh = 0; kh < fK; kh++) {
						for (int kw = 0; kw < fK; kw++) {
							for (int c = 0; c < fC_in; c++) {
								int ih = oh + kh - pad;
								int iw = ow + kw - pad;
								if (ih >= 0 && ih < H && iw >= 0 && iw < W)
									sum += input->get_elem(ih, iw, c) * weight_tensor[kh][kw][c][f];
							}
						}
					}
					sum += bias_tensor[f];
					output->set_elem(oh, ow, f, sum);
				}
			}
		}

		cout << name << " is finished" << endl;
		return output;
	}

	void get_info(string& _name, int& _fK, int& _fC_in, int& _fC_out) const override {
		_name = name;
		_fK = fK;
		_fC_in = fC_in;
		_fC_out = fC_out;
	}

	void print() const override {
		cout << "Layer: " << name << " (Conv) "
			<< "Kernel=" << fK
			<< " Cin=" << fC_in
			<< " Cout=" << fC_out << endl;
	}
};


Overwriting CLayer.h


In [11]:
%%writefile CModel.h
#pragma once
#include <vector>
#include <string>
#include <iostream>
#include <iomanip>
#include "CLayer.h"
using std::vector;
using std::string;
using std::cout;

using std::endl;
using std::setw;

// Model은 layer와 tensor들을 모두 통합 관리하여 효과적으로 CNN이 수행될 수 있도록 함

class Model {
private:
	vector<Layer*> layers; //layer들을 순차적으로 저장
	vector<Tensor3D*> tensors;// tensor들을 순차적으로 저장 ( 0번째 tensor는 0번째 layer의 입력, 마찬가지로 1번째 tensor는 1번째 layer의 입력이자 0번째 layer의 출력임)
public:
	Model() {}
	void add_layer(Layer* layer) {
		// (구현할 것) //////////////////////////////////////////////////
		// 동작: layer 객체를 layers vector의 마지막 element로 저장
		layers.push_back(layer);
	}
	~Model() {
		// (구현할 것)//////////////////////////////////////////////////
		// 동작: layers와 tensors의 모든 element를 동적할당 해제해 줄 것
		for (auto& l : layers) {
			delete l;
		}
		layers.clear();

		for (auto& t : tensors) {
			delete t;
		}
		tensors.clear();
	}
	void test(string filename_input, string filename_output) {
		// (구현할 것)//////////////////////////////////////////////////
		// 동작1: filename_input으로부터 이미지를 읽어와서, tensor로 변환한 다음 CNN을 수행한다음 그 결과물을 filename_output에 저장
		// 동작2: 주석 (1), (2), (3), (4) 중 (2)번만 구현하면 됨

		int nH, nW;
		double** input_img_Y, **input_img_U, **input_img_V;
		byte* pLoadImage;

		// (1) 영상을 읽어서 2차원 배열로 저장 (input_img_Y, U, V는 read_image에서 동적 할당됨)
		read_image(filename_input, pLoadImage, input_img_Y, input_img_U, input_img_V, nH, nW);
		cout << "Reading (" << filename_input << ") is complete..." << endl;


		// (2) 이부분만 구현할 것//////////////////////////////////////////////////
		// 동작1: 현재 tensors의 0번째 element에 영상(CNN의 입력)이 이미 저장되어 있음
		// 동작2: tensors vector의 i번째 tensor를 layers vector에 있는 i번째 layer의 forward함수로 입력받고, 그 결과를 tensors vector의 i+1번째 tensor로 저장함
		// 동작3: 결과적으로 tensors의 가장 마지막 tensor는 CNN의 출력값이 됨 (이 출력값은 (3)에서 1차원 배열로 변환되어 이미지 파일에 저장됨
		for (size_t i = 0; i < layers.size(); i++) {
			Tensor3D* input_tensor = tensors.at(i);            // i번째 tensor를 입력으로
			Tensor3D* output_tensor = layers.at(i)->forward(input_tensor); // layer forward
			tensors.push_back(output_tensor);                  // i+1번째 tensor로 추가

		}
		Tensor3D* input_tensor = tensors.at(0);
		Tensor3D* residual_tensor = tensors.at(tensors.size() - 1);

		int H, W, C;
		input_tensor->get_info(H, W, C);

		// 최종 결과를 저장할 새로운 텐서를 생성
		Tensor3D* final_image_tensor = new Tensor3D(H, W, C);

		for (int h = 0; h < H; h++) {
			for (int w = 0; w < W; w++) {
				double input_val = input_tensor->get_elem(h, w, 0);
				double residual_val = residual_tensor->get_elem(h, w, 0);
				// 원본 + 차이 값 = 최종 결과
				final_image_tensor->set_elem(h, w, 0, input_val + residual_val);
			}
		}
		// 완성된 최종 이미지를 텐서 목록의 맨 뒤에 추가
		tensors.push_back(final_image_tensor);

		cout << "Super-resolution is complete..." << endl;

		cout << "2";




		// (3) CNN의 출력(마지막 tensor)을 2차원 배열로 변환 후 U, V 채널과 함께 이미지로 저장
		Tensor3D* output_tensor_Y = tensors.at(tensors.size() - 1);
		output_tensor_Y->print();
		save_image(filename_output, pLoadImage, output_tensor_Y, input_img_U, input_img_V, nH, nW);
		cout << "Saving (" << filename_output << ") is complete..." << endl;

		cout << "3";

		// (4) 할당 해제
		free(pLoadImage);
		free_dmatrix2D(input_img_Y, nH, nW);
		free_dmatrix2D(input_img_U, nH, nW);
		free_dmatrix2D(input_img_V, nH, nW);

		cout << "4";
	}

	void read_image(const string filename, byte*& pLoadImage, double**& img_Y, double**& img_U, double**& img_V, int& nH, int& nW) {

		LoadBmp(filename.c_str(), &pLoadImage, nH, nW);///이미지파일 읽기

		img_Y = dmatrix2D(nH, nW);
		img_U = dmatrix2D(nH, nW);
		img_V = dmatrix2D(nH, nW);

		convert1Dto2D(pLoadImage, img_Y, img_U, img_V, nH, nW);

		// 입력 영상을 tensor로 변환 후 첫번째 element에 저장
		double*** inImage3D = dmatrix3D(nH, nW, 1);
		convert2Dto3D(img_Y, inImage3D, nH, nW);

		Tensor3D* temp = new Tensor3D(nH, nW, 1);
		temp->set_tensor(inImage3D);
		tensors.push_back(temp);

	}
	void save_image(string filename, byte*& pLoadImage, Tensor3D*& tensor_Y, double** img_U, double** img_V, int nH, int nW) {
		double** img_Y = dmatrix2D(nH, nW);
		convert3Dto2D(tensor_Y->get_tensor(), img_Y, nH, nW);
		convert2Dto1D(img_Y, img_U, img_V, pLoadImage, nH, nW);
		SaveBmp(filename.c_str(), pLoadImage, nH, nW);
		free_dmatrix2D(img_Y, nH, nW);
	}
	void print_layer_info() const {
		cout << endl << "(Layer information)_____________" << endl;
		for (unsigned i = 0; i < layers.size(); i++) {
			cout << i + 1 << "-th layer: ";
			layers.at(i)->print();
		}
	}
	void print_tensor_info() const {
		cout << endl << "(Tensor information)_____________" << endl;
		for (unsigned i = 0; i < tensors.size(); i++) {
			cout << i + 1 << "-th tensor: ";
			tensors.at(i)->print();
		}
	}

	//	void train();
};

Overwriting CModel.h


In [12]:
%%writefile main_p.cpp

#include "Imagelib.h"
#include "CModel.h"
#include "CTensor.h"
#include <chrono>
#include <cstdio>
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <sstream>
#ifdef _OPENMP
#include <omp.h>
#endif
#include "CModel.h"
using namespace std;

// 정확하게 동작시 20점 (부분점수 없음)

int main() {
	Model model;
	double start_time = omp_get_wtime();
	// build model
	model.add_layer(new Layer_Conv("Conv1", 9, 1, 64, LOAD_INIT, "/content/model/weights_conv1_9x9x1x64.txt", "/content/model/biases_conv1_64.txt"));
	model.add_layer(new Layer_ReLU("Relu1", 1, 64, 64));
	model.add_layer(new Layer_Conv("Conv2", 5, 64, 32, LOAD_INIT, "/content/model/weights_conv2_5x5x64x32.txt", "/content/model/biases_conv2_32.txt"));
	model.add_layer(new Layer_ReLU("Relu2", 1, 32, 32));
	model.add_layer(new Layer_Conv("Conv3", 5, 32, 1, LOAD_INIT, "/content/model/weights_conv3_5x5x32x1.txt", "/content/model/biases_conv3_1.txt"));


	model.test("/content/baby_512x512_input.bmp", "/content/baby_512x512_output_srcnn.bmp");

	model.print_layer_info();
	model.print_tensor_info();
	system("PAUSE");
	double end_time = omp_get_wtime();
	std::cout << " took " << (end_time - start_time) << " seconds.\n";
	return 0;
}

Writing main_p.cpp


In [13]:
# 1. 컴파일 (출력 실행 파일 이름은 main으로)
!g++ -std=c++17 -O2 -Wall main_p.cpp -fopenmp -o main_p

# 2. 실행
!./main_p

In file included from [01m[KCModel.h:6[m[K,
                 from [01m[Kmain_p.cpp:3[m[K:
[01m[KCLayer.h:[m[K In constructor ‘[01m[KLayer::Layer(std::string, int, int, int)[m[K’:
   20 |         string [01;35m[Kname[m[K;
      |                [01;35m[K^~~~[m[K
   17 |         int [01;35m[KfK[m[K; // kernel size in K*K kernel
      |             [01;35m[K^~[m[K
   22 |         [01;35m[KLayer[m[K(string _name, int _fK, int _fC_in, int _fC_out) : name(_name), fK(_fK), fC_in(_fC_in), fC_out(_fC_out) {}
      |         [01;35m[K^~~~~[m[K
[01m[Kmain_p.cpp:[m[K In function ‘[01m[Kint main()[m[K’:
   34 |         [01;35m[Ksystem("PAUSE")[m[K;
      |         [01;35m[K~~~~~~^~~~~~~~~[m[K
Reading (/content/baby_512x512_input.bmp) is complete...
Conv1 is finished
Relu1 is finished
Conv2 is finished
Relu2 is finished
Conv3 is finished
Super-resolution is complete...
2Tensor size: 512 x 512 x 1
Saving (/content/baby_512x512_output_srcnn.bmp) 