Skip to content

MLX90640_CalculateTo() speed improvement #11

@bjiirn

Description

@bjiirn

I noticed that the MLX90640_CalculateTo() routine is very slow and has a lot of improvement potential.
With just a few changes i was able to make it about 10 times as fast (on my ESP8266).
With that i got the total speed from <2Hz to ~8Hz.
The lines i changed have the original line as a comment on top.
And i added the Qsqrt() function th get the square root faster.
`

float Qsqrt( float number )	// fast inverse square root implementation from Quake III Arena
{
	long i;
	float x2, y;
	const float threehalfs = 1.5F;

	x2 = number * 0.5F;
	y  = number;
	i  = * ( long * ) &y;                       // evil floating point bit level hacking
	i  = 0x5f3759df - ( i >> 1 );               // what the fuck? 
	y  = * ( float * ) &i;
	y  = y * ( threehalfs - ( x2 * y * y ) );   // 1st iteration
//	y  = y * ( threehalfs - ( x2 * y * y ) );   // 2nd iteration, this can be removed

	return 1.0/y; // modified to return actual square root
}
void MLX90640_CalculateTo(uint16_t *frameData, const paramsMLX90640 *params, float emissivity, float tr, float *result)
{
	float vdd;
	float ta;
	float ta4;
	float tr4;
	float taTr;
	float gain;
	float irDataCP[2];
	float irData;
	float alphaCompensated;
	uint8_t mode;
	int8_t ilPattern;
	int8_t chessPattern;
	int8_t pattern;
	int8_t conversionPattern;
	float Sx;
	float To;
	float alphaCorrR[4];
	int8_t range;
	uint16_t subPage;
	
	subPage = frameData[833];
	vdd = MLX90640_GetVdd(frameData, params);
	ta = MLX90640_GetTa(frameData, params);
	ta4 = pow((ta + 273.15), (double)4);
	tr4 = pow((tr + 273.15), (double)4);
	taTr = tr4 - (tr4-ta4)/emissivity;
	
	alphaCorrR[0] = 1 / (1 + params->ksTo[0] * 40);
	alphaCorrR[1] = 1 ;
	alphaCorrR[2] = (1 + params->ksTo[2] * params->ct[2]);
	alphaCorrR[3] = alphaCorrR[2] * (1 + params->ksTo[3] * (params->ct[3] - params->ct[2]));
	
//------------------------- Gain calculation -----------------------------------    
	gain = frameData[778];
	if(gain > 32767)
	{
		gain = gain - 65536;
	}
	
	gain = params->gainEE / gain; 

//------------------------- To calculation -------------------------------------    
	mode = (frameData[832] & 0x1000) >> 5;
	
	irDataCP[0] = frameData[776];  
	irDataCP[1] = frameData[808];
	for( int i = 0; i < 2; i++)
	{
		if(irDataCP[i] > 32767)
		{
			irDataCP[i] = irDataCP[i] - 65536;
		}
		irDataCP[i] = irDataCP[i] * gain;
	}
	irDataCP[0] = irDataCP[0] - params->cpOffset[0] * (1 + params->cpKta * (ta - 25)) * (1 + params->cpKv * (vdd - 3.3));
	if( mode ==  params->calibrationModeEE)
	{
		irDataCP[1] = irDataCP[1] - params->cpOffset[1] * (1 + params->cpKta * (ta - 25)) * (1 + params->cpKv * (vdd - 3.3));
	}
	else
	{
	irDataCP[1] = irDataCP[1] - (params->cpOffset[1] + params->ilChessC[0]) * (1 + params->cpKta * (ta - 25)) * (1 + params->cpKv * (vdd - 3.3));
	}

	for( int pixelNumber = 0; pixelNumber < 768; pixelNumber++)
	{
		//ilPattern = pixelNumber / 32 - (pixelNumber / 64) * 2;
		ilPattern = (pixelNumber>>5)&1;
		//chessPattern = ilPattern ^ (pixelNumber - (pixelNumber/2)*2); 
		chessPattern = ilPattern ^ (pixelNumber&1); 
		conversionPattern = ((pixelNumber + 2) / 4 - (pixelNumber + 3) / 4 + (pixelNumber + 1) / 4 - pixelNumber / 4) * (1 - 2 * ilPattern);
		
		if(mode == 0)
		{
		pattern = ilPattern; 
		}
		else 
		{
		pattern = chessPattern; 
		}               
		if(pattern == frameData[833])
		{    

			irData = frameData[pixelNumber];
			if(irData > 32767)
			{
				irData = irData - 65536;
			}
			irData = irData * gain;
			
			irData = irData - params->offset[pixelNumber]*(1 + params->kta[pixelNumber]*(ta - 25))*(1 + params->kv[pixelNumber]*(vdd - 3.3));
			if(mode !=  params->calibrationModeEE)
			{
			irData = irData + params->ilChessC[2] * (2 * ilPattern - 1) - params->ilChessC[1] * conversionPattern; 
			}
			
			irData = irData / emissivity;
	
			irData = irData - params->tgc * irDataCP[subPage];

			alphaCompensated = (params->alpha[pixelNumber] - params->tgc * params->cpAlpha[subPage])*(1 + params->KsTa * (ta - 25));
			
			//Sx = pow((double)alphaCompensated, (double)3) * (irData + alphaCompensated * taTr);
			Sx = alphaCompensated*alphaCompensated*alphaCompensated * (irData + alphaCompensated * taTr);
			
			//Sx = sqrt(sqrt(Sx)) * params->ksTo[1];
			Sx = Qsqrt(Qsqrt(Sx)) * params->ksTo[1];
			
			//To = sqrt(sqrt(irData/(alphaCompensated * (1 - params->ksTo[1] * 273.15) + Sx) + taTr)) - 273.15;
			To = Qsqrt(Qsqrt(irData/(alphaCompensated * (1 - params->ksTo[1] * 273.15) + Sx) + taTr)) - 273.15;

			if(To < params->ct[1])
			{
				range = 0;
			}
			else if(To < params->ct[2])   
			{
				range = 1;            
			}   
			else if(To < params->ct[3])
			{
				range = 2;            
			}
			else
			{
				range = 3;            
			}      
			
			//To = sqrt(sqrt(irData / (alphaCompensated * alphaCorrR[range] * (1 + params->ksTo[range] * (To - params->ct[range]))) + taTr)) - 273.15;
			To = Qsqrt(Qsqrt(irData / (alphaCompensated * alphaCorrR[range] * (1 + params->ksTo[range] * (To - params->ct[range]))) + taTr)) - 273.15;
			
			result[pixelNumber] = To;
		}
	}
}

`

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions